diff --git a/.firebaserc b/.firebaserc new file mode 100644 index 0000000..6ddcd7f --- /dev/null +++ b/.firebaserc @@ -0,0 +1,3 @@ +{ + "projects": {} +} \ No newline at end of file diff --git a/.github/workflows/deploy_app.yml b/.github/workflows/deploy_app.yml index f6fa05a..9be7444 100644 --- a/.github/workflows/deploy_app.yml +++ b/.github/workflows/deploy_app.yml @@ -24,6 +24,7 @@ env: CONFIG_FILE: "app/app.yaml" SERVICE: audiora-app API_BASE_URL: ${{ secrets.API_BASE_URL }} + FIREBASE_CONFIG: ${{ secrets.FIREBASE_CONFIG }} jobs: prepare: diff --git a/api/requirements.txt b/api/requirements.txt index a7490ae..f10ddd5 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -30,4 +30,8 @@ uvicorn uvloop redis[hiredis] -async-web-search \ No newline at end of file +async-web-search + +lxml +beautifulsoup4 +pypdf[crypto] \ No newline at end of file diff --git a/api/src/main.py b/api/src/main.py index a5faa27..ae7297b 100644 --- a/api/src/main.py +++ b/api/src/main.py @@ -2,7 +2,7 @@ from time import time from typing import Any, Callable, Generator -from fastapi import BackgroundTasks, FastAPI, HTTPException, Request +from fastapi import BackgroundTasks, FastAPI, Form, HTTPException, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse from fastapi_utilities import add_timer_middleware @@ -13,6 +13,18 @@ SessionChatItem, SessionChatRequest, ) +from src.utils.custom_sources.base_utils import SourceContent +from src.utils.custom_sources.extract_url_content import ExtractURLContent, ExtractURLContentRequest +from src.utils.custom_sources.generate_url_source import ( + CustomSourceManager, + CustomSourceModel, + DeleteCustomSourcesRequest, + GenerateCustomSourceRequest, + GetCustomSourcesRequest, + generate_custom_source, +) +from src.utils.custom_sources.save_copied_source import CopiedPasteSourceRequest, save_copied_source +from src.utils.custom_sources.save_uploaded_sources import UploadedFiles from src.utils.generate_audiocast import ( GenerateAudioCastRequest, GenerateAudioCastResponse, @@ -54,12 +66,16 @@ async def log_request_headers(request: Request, call_next: Callable): @app.get("/") -async def root(): +def root(): return {"message": "Hello World"} @app.post("/chat/{session_id}", response_model=Generator[str, Any, None]) -async def chat_endpoint(session_id: str, request: SessionChatRequest, background_tasks: BackgroundTasks): +def chat_endpoint( + session_id: str, + request: SessionChatRequest, + background_tasks: BackgroundTasks, +): """Chat endpoint""" category = request.contentCategory db = SessionManager(session_id, category) @@ -85,18 +101,20 @@ async def generate_audiocast_endpoint( request: GenerateAudioCastRequest, background_tasks: BackgroundTasks, ): - result = await generate_audiocast(request, background_tasks) - return result + return await generate_audiocast(request, background_tasks) @app.get("/audiocast/{session_id}", response_model=GenerateAudioCastResponse) -async def get_audiocast_endpoint(session_id: str): +def get_audiocast_endpoint(session_id: str): result = get_audiocast(session_id) return result @app.post("/generate-audiocast-source", response_model=str) -async def generate_audiocast_source_endpoint(request: GenerateAudiocastSource, background_tasks: BackgroundTasks): +async def generate_audiocast_source_endpoint( + request: GenerateAudiocastSource, + background_tasks: BackgroundTasks, +): source_content = await generate_audiocast_source(request, background_tasks) if not source_content: raise HTTPException(status_code=500, detail="Failed to generate source content") @@ -132,6 +150,50 @@ async def get_signed_url_endpoint(blobname: str): @app.post("/get-session-title", response_model=str) -async def get_session_title_endpoint(request: GetSessionTitleModel, background_tasks: BackgroundTasks): - source_content = await get_session_title(request, background_tasks) - return source_content +async def get_session_title_endpoint( + request: GetSessionTitleModel, + background_tasks: BackgroundTasks, +): + return await get_session_title(request, background_tasks) + + +@app.post("/extract-url-content", response_model=SourceContent) +def extract_url_content_endpoint(request: ExtractURLContentRequest): + extractor = ExtractURLContent() + page_content = extractor._extract(request.url) + return page_content.model_dump() + + +@app.post("/generate-url-source", response_model=SourceContent) +def generate_url_source_endpoint( + request: GenerateCustomSourceRequest, + background_tasks: BackgroundTasks, +): + return generate_custom_source(request, background_tasks) + + +@app.post("/get-custom-sources", response_model=list[CustomSourceModel]) +async def get_custom_sources_endpoint(request: GetCustomSourcesRequest): + return CustomSourceManager(request.sessionId)._get_custom_sources() + + +@app.post("/delete-custom-source", response_model=list[CustomSourceModel]) +def delete_custom_source_endpoint(request: DeleteCustomSourcesRequest): + manager = CustomSourceManager(request.sessionId) + manager._delete_custom_source(request.sourceId) + return "Deleted" + + +@app.post("/save-copied-source", response_model=str) +def save_copied_source_endpoint(request: CopiedPasteSourceRequest): + result = save_copied_source(request) + return result + + +@app.post("/save-uploaded-sources", response_model=str) +async def save_uploaded_files_endpoint(files: list[UploadFile], sessionId: str = Form(...)): + """ + Save sources uploaded from the frontend + """ + result = await UploadedFiles(session_id=sessionId)._save_sources(files) + return result diff --git a/api/src/services/firestore_sdk.py b/api/src/services/firestore_sdk.py index cef9ee2..0a87c84 100644 --- a/api/src/services/firestore_sdk.py +++ b/api/src/services/firestore_sdk.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import Dict, Literal from firebase_admin.firestore import client, firestore @@ -48,3 +49,16 @@ def _get_document(self, collection: Collection, doc_id: str): def _get_documents(self, collection: Collection): return self._get_collection(collection).stream() + + @classmethod + def _safe_to_dict(cls, data: dict): + """ + safely parse firestore data by converting convert all timestamp to string + """ + + def _safe_to_str(value: dict | str | datetime): + if isinstance(value, datetime): + return value.strftime("%Y-%m-%d %H:%M:%S") + return value + + return {k: _safe_to_str(v) for k, v in data.items()} diff --git a/api/src/utils/audiocast_request.py b/api/src/utils/audiocast_request.py index ecda344..207767c 100644 --- a/api/src/utils/audiocast_request.py +++ b/api/src/utils/audiocast_request.py @@ -21,22 +21,20 @@ async def _run(self): Returns: str: The audiocast source content """ - source_content = await self.__use_openai(self.category, self.preference_summary) + additional_ctx = await self.get_context(self.preference_summary) + source_content = await self.__use_openai(self.category, self.preference_summary, additional_ctx) if not source_content: raise ValueError("Failed to generate audiocast source content") return self._refine(source_content) - async def __use_openai(self, category: ContentCategory, preference_summary: str): + async def __use_openai(self, category: ContentCategory, preference_summary: str, additional_ctx: str): """ Generate audiocast source content using OpenAI. """ refined_summary = re.sub("You want", "A user who wants", preference_summary, flags=re.IGNORECASE) refined_summary = re.sub("You", "A user", refined_summary, flags=re.IGNORECASE) - additional_context = await self.get_context(self.preference_summary) - print(f">>> Additional context: {additional_context}") - response = get_openai().chat.completions.create( model="gpt-4o", messages=[ @@ -45,7 +43,7 @@ async def __use_openai(self, category: ContentCategory, preference_summary: str) "content": generate_source_content_prompt( category, refined_summary, - additional_context, + additional_ctx, ), }, { diff --git a/api/src/utils/audiocast_script_maker.py b/api/src/utils/audiocast_script_maker.py index 5c64db1..827d840 100644 --- a/api/src/utils/audiocast_script_maker.py +++ b/api/src/utils/audiocast_script_maker.py @@ -13,9 +13,10 @@ class AudioScriptMaker: category: ContentCategory - def __init__(self, category: ContentCategory, source_content: str): + def __init__(self, category: ContentCategory, source_content: str, compiled_custom_sources: str | None = None): self.category = category self.source_content = source_content + self.compiled_custom_sources = compiled_custom_sources def create(self, provider: AudioScriptProvider = "openai"): """ @@ -28,9 +29,11 @@ def create(self, provider: AudioScriptProvider = "openai"): """ print("Generating audio script...") print(f"Category: {self.category}; Source content: {self.source_content}") + if self.compiled_custom_sources: + print(f"Custom sources: {self.compiled_custom_sources}") prompt_maker = TTSPromptMaker(self.category, Metadata()) - system_prompt = prompt_maker.get_system_prompt(self.source_content) + system_prompt = prompt_maker.get_system_prompt(self.source_content, self.compiled_custom_sources) if provider == "anthropic": audio_script = self.__use_anthropic(system_prompt) diff --git a/api/src/utils/audiocast_source_refiner.py b/api/src/utils/audiocast_source_refiner.py index 0147c31..b4f3184 100644 --- a/api/src/utils/audiocast_source_refiner.py +++ b/api/src/utils/audiocast_source_refiner.py @@ -10,7 +10,7 @@ def __init__(self, category: ContentCategory, preference_summary: str): self.category = category self.preference_summary = preference_summary - def _refine(self, content): + def _refine(self, content: str): """ Moderate and augment the source content to ensure it aligns with the user's preferences. """ diff --git a/api/src/utils/custom_sources/__init__.py b/api/src/utils/custom_sources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/src/utils/custom_sources/base_utils.py b/api/src/utils/custom_sources/base_utils.py new file mode 100644 index 0000000..6291f6e --- /dev/null +++ b/api/src/utils/custom_sources/base_utils.py @@ -0,0 +1,105 @@ +from typing import Literal, Optional, TypedDict, cast + +from google.cloud.firestore_v1 import DocumentReference +from pydantic import BaseModel + +from src.services.firestore_sdk import ( + Collection, + DBManager, + collections, +) + + +class SourceContent(BaseModel): + id: str + content: str + content_type: str + metadata: dict = {} + title: Optional[str] = None + + def __str__(self): + result = f"Content: {self.content}" + if self.title: + return f"Title: {self.title}\n{result}" + return result + + +class CustomSourceModel(SourceContent): + source_type: Literal["link", "copy/paste", "file_upload"] + url: Optional[str] = None + created_at: Optional[str] = None + updated_at: Optional[str] = None + + +class SourceContentDict(TypedDict): + id: str + content: str + content_type: str + metadata: dict + title: Optional[str] + + +class CustomSourceModelDict(SourceContentDict): + source_type: Literal["link", "copy/paste", "file_upload"] + url: Optional[str] + created_at: Optional[str] + updated_at: Optional[str] + + +class CustomSourceManager(DBManager): + collection: Collection = collections["audiora_sessions"] + sub_collection = "custom_sources" + + def __init__(self, session_id: str): + super().__init__() + self.doc_id = session_id + + def _check_document(self): + """if the collection does not exist, create it""" + doc = self._get_document(self.collection, self.doc_id) + if not doc.exists: + raise Exception("Session not found") + return doc + + def _get_doc_ref(self, source_id: str) -> DocumentReference: + self._check_document() + return ( + self._get_collection(self.collection) + .document(self.doc_id) + .collection(self.sub_collection) + .document(source_id) + ) + + def _set_custom_source(self, data: CustomSourceModel): + return self._get_doc_ref(data.id).set( + { + **(data.model_dump()), + "created_at": self._timestamp, + "updated_at": self._timestamp, + } + ) + + def _get_custom_source(self, source_id: str) -> CustomSourceModel | None: + doc = self._get_doc_ref(source_id).get() + data = doc.to_dict() + if doc.exists and data: + return cast(CustomSourceModel, self._safe_to_dict(data)) + + def _get_custom_sources(self) -> list[CustomSourceModelDict]: + self._check_document() + + try: + session_ref = self._get_collection(self.collection).document(self.doc_id) + docs = session_ref.collection(self.sub_collection).get() + return [ + cast(CustomSourceModelDict, self._safe_to_dict(doc.to_dict())) + for doc in docs + if doc.exists and doc.to_dict() + ] + + except Exception as e: + print(f"Error getting custom sources for Session: {self.doc_id}", e) + return [] + + def _delete_custom_source(self, source_id: str): + return self._get_doc_ref(source_id).delete() diff --git a/api/src/utils/custom_sources/extract_url_content.py b/api/src/utils/custom_sources/extract_url_content.py new file mode 100644 index 0000000..d51f4b4 --- /dev/null +++ b/api/src/utils/custom_sources/extract_url_content.py @@ -0,0 +1,70 @@ +from urllib.parse import urlparse +from uuid import uuid4 + +import httpx +from bs4 import BeautifulSoup, Tag +from pydantic import BaseModel + +from src.utils.decorators import process_time + +from .base_utils import SourceContent +from .read_content import ReadContent + + +class ExtractURLContentRequest(BaseModel): + url: str + + +class ExtractURLContent(ReadContent): + def __init__(self, timeout: int = 10): + self.timeout = timeout + + def _clean_text(self, text: str) -> str: + """TODO: write text cleaning logic""" + return text.strip() + + def _extract_pdf(self, content: bytes) -> tuple[str, dict]: + text_content, pdf_reader = self._read_pdf(content) + metadata = {**(pdf_reader.metadata or {}), "pages": pdf_reader.get_num_pages()} + return self._clean_text(text_content), metadata + + def _extract_html(self, content: bytes) -> tuple[str, dict]: + soup = BeautifulSoup(content, "lxml") + for element in soup(["script", "style", "nav", "footer"]): + element.decompose() + + text_content = soup.get_text(separator="\n\n", strip=True) + descr_tag = soup.find("meta", {"name": "description"}) + metadata = { + "title": soup.title.string if soup.title else "", + "description": descr_tag.get("content") if isinstance(descr_tag, Tag) else "", + } + + return self._clean_text(text_content), metadata + + @process_time() + def _extract(self, url: str) -> SourceContent: + parsed_url = urlparse(url) + if not parsed_url.scheme or not parsed_url.netloc: + raise ValueError("Invalid URL provided") + + try: + response = httpx.get(url) + response.raise_for_status() + content_type = response.headers.get("content-type", "").lower() + + if url.lower().endswith(".pdf") or "application/pdf" in content_type: + text, metadata = self._extract_pdf(response.content) + content_type = "application/pdf" + else: + text, metadata = self._extract_html(response.content) + content_type = "text/html" + + return SourceContent( + id=str(uuid4()), + content=text, + content_type=content_type, + metadata=metadata, + ) + except Exception as e: + raise Exception(f"Failed to extract content: {str(e)}") diff --git a/api/src/utils/custom_sources/generate_url_source.py b/api/src/utils/custom_sources/generate_url_source.py new file mode 100644 index 0000000..8a61860 --- /dev/null +++ b/api/src/utils/custom_sources/generate_url_source.py @@ -0,0 +1,37 @@ +from fastapi import BackgroundTasks +from pydantic import BaseModel + +from src.utils.custom_sources.extract_url_content import ExtractURLContent + +from .base_utils import CustomSourceManager, CustomSourceModel + + +class GenerateCustomSourceRequest(BaseModel): + url: str + sessionId: str + + +class GetCustomSourcesRequest(BaseModel): + sessionId: str + + +class DeleteCustomSourcesRequest(BaseModel): + sessionId: str + sourceId: str + + +def generate_custom_source(request: GenerateCustomSourceRequest, background_tasks: BackgroundTasks): + extractor = ExtractURLContent() + content = extractor._extract(request.url) + + def save_to_firestore(): + custom_source = CustomSourceModel( + **content.model_dump(), + url=request.url, + source_type="link", + ) + manager = CustomSourceManager(request.sessionId) + manager._set_custom_source(custom_source) + + background_tasks.add_task(save_to_firestore) + return content.model_dump() diff --git a/api/src/utils/custom_sources/read_content.py b/api/src/utils/custom_sources/read_content.py new file mode 100644 index 0000000..295fb9d --- /dev/null +++ b/api/src/utils/custom_sources/read_content.py @@ -0,0 +1,21 @@ +from io import BytesIO + +from pypdf import PdfReader + + +class ReadContent: + def _read_pdf(self, content: bytes) -> tuple[str, PdfReader]: + pdf_reader = PdfReader(BytesIO(content)) + + pages: list[str] = [] + for page in pdf_reader.pages: + text = page.extract_text() + # Split into paragraphs and clean + paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()] + pages.append("\n\n".join(paragraphs)) + + text_content = "\n\n".join(pages).strip() + return text_content, pdf_reader + + def _read_txt(self, content: bytes) -> str: + return content.decode() diff --git a/api/src/utils/custom_sources/save_copied_source.py b/api/src/utils/custom_sources/save_copied_source.py new file mode 100644 index 0000000..260c525 --- /dev/null +++ b/api/src/utils/custom_sources/save_copied_source.py @@ -0,0 +1,22 @@ +from uuid import uuid4 + +from pydantic import BaseModel + +from .base_utils import CustomSourceManager, CustomSourceModel + + +class CopiedPasteSourceRequest(BaseModel): + sessionId: str + text: str + + +def save_copied_source(request: CopiedPasteSourceRequest): + custom_source = CustomSourceModel( + id=str(uuid4()), + content=request.text, + content_type="text/plain", + source_type="copy/paste", + ) + + CustomSourceManager(request.sessionId)._set_custom_source(custom_source) + return "Saved" diff --git a/api/src/utils/custom_sources/save_uploaded_sources.py b/api/src/utils/custom_sources/save_uploaded_sources.py new file mode 100644 index 0000000..28f2882 --- /dev/null +++ b/api/src/utils/custom_sources/save_uploaded_sources.py @@ -0,0 +1,52 @@ +from uuid import uuid4 + +from fastapi import UploadFile + +from .base_utils import CustomSourceManager, CustomSourceModel, SourceContent +from .read_content import ReadContent + +TEN_MB = 10 * 1024 * 1024 + + +class UploadedFiles: + def __init__(self, session_id: str): + self.session_id = session_id + self.content_reader = ReadContent() + + async def _extract_content(self, file: UploadFile): + file_bytes = await file.read() + # ensure file size is less than 10MB + if len(file_bytes) > TEN_MB: + return None + + if file.content_type == "application/pdf": + text_content, pdf_reader = self.content_reader._read_pdf(file_bytes) + + metadata = {**(pdf_reader.metadata or {}), "pages": pdf_reader.get_num_pages()} + content_type = "application/pdf" + elif file.content_type == "text/plain": + text_content = self.content_reader._read_txt(file_bytes) + + metadata = {} + content_type = "text/plain" + else: + return None + + return SourceContent( + id=str(uuid4()), + content=text_content, + content_type=content_type, + metadata=metadata, + title=file.filename, + ) + + async def _save_sources(self, files: list[UploadFile]): + manager = CustomSourceManager(self.session_id) + + for file in files: + content = await self._extract_content(file) + if content: + custom_source = CustomSourceModel(**content.model_dump(), source_type="file_upload") + manager._set_custom_source(custom_source) + + return "Saved" diff --git a/api/src/utils/generate_audiocast.py b/api/src/utils/generate_audiocast.py index fd46725..6c1b02e 100644 --- a/api/src/utils/generate_audiocast.py +++ b/api/src/utils/generate_audiocast.py @@ -9,11 +9,40 @@ GenerateAudioCastRequest, GenerateAudioCastResponse, ) +from src.utils.chat_utils import ContentCategory +from src.utils.custom_sources.base_utils import CustomSourceManager from src.utils.generate_audiocast_source import GenerateAudiocastSource, generate_audiocast_source from src.utils.session_manager import SessionManager from src.utils.waveform_utils import WaveformUtils +def compile_custom_sources(session_id: str): + sources = CustomSourceManager(session_id)._get_custom_sources() + return "\n\n".join([str(source) for source in sources if source["content"]]) + + +def post_generate_audio( + session_id: str, + category: ContentCategory, + audio_path: str, + audio_script: str, +): + try: + # Store audio + storage_manager = StorageManager() + storage_manager.upload_audio_to_gcs(audio_path, session_id) + + # Update session metadata + db = SessionManager(session_id, category) + db._update_transcript(audio_script) + + # Generate and save audio waveform as mp4 + waveform_utils = WaveformUtils(session_id, audio_path) + waveform_utils.run_all() + except Exception as e: + print(f"Error in generate_audiocast background_tasks: {str(e)}") + + async def generate_audiocast(request: GenerateAudioCastRequest, background_tasks: BackgroundTasks): """## Generate audiocast based on a summary of user's request @@ -21,34 +50,42 @@ async def generate_audiocast(request: GenerateAudioCastRequest, background_tasks 1. Generate source content 2. Generate audio script 3. Generate audio - 4a. Store audio - 4b. Store the audio waveform on GCS + 4. a) Store audio. b) Store the audio waveform on GCS 5. Update session """ summary = request.summary category = request.category session_id = request.sessionId - source_content = await generate_audiocast_source( - GenerateAudiocastSource( - sessionId=session_id, - category=category, - preferenceSummary=summary, - ), - background_tasks, - ) - db = SessionManager(session_id, category) def update_session_info(info: str): background_tasks.add_task(db._update_info, info) + session_data = SessionManager.data(session_id) + source_content = session_data.metadata.source if session_data and session_data.metadata else None + + if not source_content: + update_session_info("Generating source content...") + source_content = await generate_audiocast_source( + GenerateAudiocastSource( + sessionId=session_id, + category=category, + preferenceSummary=summary, + ), + background_tasks, + ) + if not source_content: raise HTTPException(status_code=500, detail="Failed to generate source content") + # get custom sources + update_session_info("Checking for custom sources...") + compiled_custom_sources = compile_custom_sources(session_id) + # Generate audio script update_session_info("Generating audio script...") - script_maker = AudioScriptMaker(category, source_content) + script_maker = AudioScriptMaker(category, source_content, compiled_custom_sources) audio_script = script_maker.create(provider="gemini") if not audio_script: @@ -59,24 +96,13 @@ def update_session_info(info: str): audio_manager = AudioManager(custom_config=AudioManagerConfig(tts_provider="openai")) audio_path = await audio_manager.generate_speech(audio_script) - def _run_on_background(): - try: - # Store audio - storage_manager = StorageManager() - storage_manager.upload_audio_to_gcs(audio_path, session_id) - - # Update session metadata - db._update_source(source_content) - db._update_transcript(audio_script) - # TODO: add one to update title - - # Generate and save audio waveform as mp4 - waveform_utils = WaveformUtils(session_id, audio_path) - waveform_utils.run_all() - except Exception as e: - print(f"Error in generate_audiocast background_tasks: {str(e)}") - - background_tasks.add_task(_run_on_background) + background_tasks.add_task( + post_generate_audio, + session_id, + category, + audio_path, + audio_script, + ) session_data = SessionManager.data(session_id) if not session_data: diff --git a/api/src/utils/generate_audiocast_source.py b/api/src/utils/generate_audiocast_source.py index d637e67..c80df70 100644 --- a/api/src/utils/generate_audiocast_source.py +++ b/api/src/utils/generate_audiocast_source.py @@ -27,13 +27,12 @@ async def generate_audiocast_source(request: GenerateAudiocastSource, background @use_cache_manager(cache_key) async def _handler(): - def update_session_info(info: str): - db = SessionManager(session_id, category) - background_tasks.add_task(db._update_info, info) + db = SessionManager(session_id, category) + background_tasks.add_task(db._update_info, "Generating source content...") - update_session_info("Generating source content...") - source_content_generator = GenerateSourceContent(category, preference_summary) - source_content = await source_content_generator._run() + generator = GenerateSourceContent(category, preference_summary) + source_content = await generator._run() + background_tasks.add_task(db._update_source, source_content) return source_content diff --git a/api/src/utils/prompt_templates/tts_prompt.py b/api/src/utils/prompt_templates/tts_prompt.py index 474d1fa..697e53c 100644 --- a/api/src/utils/prompt_templates/tts_prompt.py +++ b/api/src/utils/prompt_templates/tts_prompt.py @@ -19,15 +19,17 @@ def get_tags(self) -> list[str]: """Get connection tags based on the number of speakers.""" return [f"" for i in range(1, 10)] - def get_system_prompt(self, source_content: str) -> str: + def get_system_prompt(self, source_content: str, compiled_custom_sources: str | None = None) -> str: """ - Generate an optimized system prompt for converting a source content into the appropriate format. + Generate an optimized system prompt for converting source contents into the appropriate format. """ - return f"""You're a super-intelligent AI who generates different forms, styles and genres of audiocast script. + return f"""You're a super-intelligent AI that can generate different forms, styles and genres of audiocast script. - Your task is to transform the following source content into an engaging {self.category} TTS-optimized audiocast script. + Your task is to transform the following source contents into a single, engaging {self.category} TTS-optimized audiocast script. - Source Content: {source_content} + Source Contents: + - AI-generated: {source_content} + {"- User-provided: " + compiled_custom_sources if compiled_custom_sources else ""} Content Parameters: 1. Format: Create {category_qualifiers[self.category]} in TTS-optomized audiocast flow @@ -77,15 +79,20 @@ def get_system_prompt(self, source_content: str) -> str: - Verify SSML tag accuracy, opening and closure - Check speaker tag consistency - Clear, accessible language - - Maintain accurate representation of source content, don't deviate + - Maintain accurate representation of the source contents, don't deviate - Appropriate word counnt for {self.category} format - Generate only the audiocast transcript - Ensure all SSML tags are properly formatted and within the speaker tags + 6. Using the Source Contents: + - Treat the source contents as complimentary, transitioning smoothly between sections and segments + - Exclude any unnecessary or incompatible details or information + - Prioritize sections that greatly support the overall discussion/narrative + Output Format Example for 2 speakers: Hello there! [Content Intro & Overview]. I'm particularly excited about [Specific Aspect]. What caught your attention about this? Well what really stands out is [Key Point]... - Remember: Focus solely on conveying the source content in an engaging audio format while optimizing for audio delivery. + Remember: Keep the final output engaging and optimized for audio delivery. """ diff --git a/app/package.json b/app/package.json index 98fa045..02aaad1 100644 --- a/app/package.json +++ b/app/package.json @@ -26,12 +26,14 @@ "dayjs": "^1.11.13", "dotenv": "16.3.1", "ejs": "^3.1.10", + "firebase": "^11.0.2", "github-slugger": "^2.0.0", "lucide-svelte": "^0.456.0", "marked": "^14.1.4", "mode-watcher": "^0.4.1", "nanoid": "^5.0.7", "ramda": "^0.30.1", + "rxjs": "^7.8.1", "svelte-persisted-store": "^0.11.0", "svelte-sonner": "^0.3.28", "tailwind-merge": "^2.4.0", diff --git a/app/src/global.d.ts b/app/src/global.d.ts index fc5d3cc..658fdba 100644 --- a/app/src/global.d.ts +++ b/app/src/global.d.ts @@ -1,5 +1,6 @@ declare namespace NodeJS { export interface ProcessEnv { NODE_ENV: 'development' | 'production'; + FIREBASE_CONFIG: object & { apiKey: string }; } } diff --git a/app/src/hooks.server.ts b/app/src/hooks.server.ts index 956ecb8..79b78e9 100644 --- a/app/src/hooks.server.ts +++ b/app/src/hooks.server.ts @@ -1,3 +1,4 @@ +import 'dotenv/config'; import type { HandleServerError, Handle } from '@sveltejs/kit'; import { sequence } from '@sveltejs/kit/hooks'; import ejs from 'ejs'; diff --git a/app/src/lib/components/ChatListActionItems.svelte b/app/src/lib/components/ChatListActionItems.svelte index d47d753..9074fa7 100644 --- a/app/src/lib/components/ChatListActionItems.svelte +++ b/app/src/lib/components/ChatListActionItems.svelte @@ -11,11 +11,11 @@ import { toast } from 'svelte-sonner'; import { Button } from './ui/button'; import { getSessionContext } from '@/stores/sessionContext.svelte'; - import RenderAudioSource from '@/components/RenderAudioSource.svelte'; import { streamingResponse } from '@/utils/streamingResponse'; import { Share2Icon } from 'lucide-svelte'; import ShareModal from './share/ShareModal.svelte'; import { getShareableLink, getShareTitle } from '@/utils/shareMeta'; + import ManageAudioSourceDrawer from './ManageAudioSourceDrawer.svelte'; export let sessionId: string; export let category: ContentCategory; @@ -133,7 +133,7 @@ > {#if $audioSource$} - + {:else} + + + + + + + + Audiocast Source + + +
+
+ {#key accordionResetKey} + + + +
+ Add custom source + {#if overMaxSources} + + You've reached a max of {MAX_ALLOWABLE_SOURCES} custom sources + + {/if} +
+ + + +
+ + createURLContent(detail.url)} + on:submitCopyPaste={({ detail }) => createCopyPasteContent(detail.text)} + on:submitFiles={({ detail }) => uploadValidatedFiles(detail.files)} + /> + +
+ + +
+ {/key} +
+
+
+
+ + + diff --git a/app/src/lib/components/RenderAudioSource.svelte b/app/src/lib/components/RenderAudioSource.svelte deleted file mode 100644 index 94d2f11..0000000 --- a/app/src/lib/components/RenderAudioSource.svelte +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - Audiocast Source - - -
-
-
- {#await parse(audioSource) then parsedContent} - {@html parsedContent} - {/await} -
-
-
-
-
-
diff --git a/app/src/lib/components/RenderAudioSources.svelte b/app/src/lib/components/RenderAudioSources.svelte new file mode 100644 index 0000000..e02c35a --- /dev/null +++ b/app/src/lib/components/RenderAudioSources.svelte @@ -0,0 +1,30 @@ + + + + + AI-generated Source + +
+ {#await parse(aiSource) then parsedContent} + {@html parsedContent} + {/await} +
+
+
+ + +
+ + diff --git a/app/src/lib/components/custom-source/AddCustomSource.svelte b/app/src/lib/components/custom-source/AddCustomSource.svelte new file mode 100644 index 0000000..9198fc5 --- /dev/null +++ b/app/src/lib/components/custom-source/AddCustomSource.svelte @@ -0,0 +1,26 @@ + + +
+ {#if generatingSource} + + {:else if showWebsiteURLForm} + (showWebsiteURLForm = false)} on:submitURL /> + {:else if showCopyPasteForm} + (showCopyPasteForm = false)} on:submitCopyPaste /> + {:else} + (showWebsiteURLForm = true)} + on:useCopyPaste={() => (showCopyPasteForm = true)} + on:submitFiles + /> + {/if} +
diff --git a/app/src/lib/components/custom-source/AddCustomSourceForm.svelte b/app/src/lib/components/custom-source/AddCustomSourceForm.svelte new file mode 100644 index 0000000..4c0005c --- /dev/null +++ b/app/src/lib/components/custom-source/AddCustomSourceForm.svelte @@ -0,0 +1,156 @@ + + + + + + + Custom source + + Let's base Audiora's responses on the information that matters most to you. (E.g., marketing + plans, research notes, meeting transcripts, etc.) + + + + + +
+ + + +
+
+
diff --git a/app/src/lib/components/custom-source/CopyPasteSource.svelte b/app/src/lib/components/custom-source/CopyPasteSource.svelte new file mode 100644 index 0000000..23fa2dd --- /dev/null +++ b/app/src/lib/components/custom-source/CopyPasteSource.svelte @@ -0,0 +1,82 @@ + + +
+ + +
+
+ +

Paste copied text

+
+ +
+ +

Paste your copied text below to upload as a source

+
+ + +
+ + +
+