diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000000..e57283f0377 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,25 @@ +## Description +[Provide a brief description of the changes in this PR] + + +## How Has This Been Tested? +[Describe the tests you ran to verify your changes] + + +## Accepted Risk +[Any know risks or failure modes to point out to reviewers] + + +## Related Issue(s) +[If applicable, link to the issue(s) this PR addresses] + + +## Checklist: +- [ ] All of the automated tests pass +- [ ] All PR comments are addressed and marked resolved +- [ ] If there are migrations, they have been rebased to latest main +- [ ] If there are new dependencies, they are added to the requirements +- [ ] If there are new environment variables, they are added to all of the deployment methods +- [ ] If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS +- [ ] Docker images build and basic functionalities work +- [ ] Author has done a final read through of the PR right before merge diff --git a/.github/workflows/docker-build-backend-container-on-merge-group.yml b/.github/workflows/docker-build-backend-container-on-merge-group.yml deleted file mode 100644 index 57ab29b00bf..00000000000 --- a/.github/workflows/docker-build-backend-container-on-merge-group.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Build Backend Image on Merge Group - -on: - pull_request: - branches: [ "main" ] - merge_group: - types: [checks_requested] - -env: - REGISTRY_IMAGE: danswer/danswer-backend - -jobs: - build: - # TODO: make this a matrix build like the web containers - runs-on: - group: amd64-image-builders - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Backend Image Docker Build - uses: docker/build-push-action@v5 - with: - context: ./backend - file: ./backend/Dockerfile - platforms: linux/amd64,linux/arm64 - push: false - tags: | - ${{ env.REGISTRY_IMAGE }}:latest - build-args: | - DANSWER_VERSION=v0.0.1 diff --git a/.github/workflows/docker-build-push-model-server-container-on-tag.yml b/.github/workflows/docker-build-push-model-server-container-on-tag.yml index 104b891d768..134b77d43c2 100644 --- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml +++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml @@ -7,7 +7,8 @@ on: jobs: build-and-push: - runs-on: ubuntu-latest + runs-on: + group: amd64-image-builders steps: - name: Checkout code diff --git a/.github/workflows/docker-build-web-container-on-merge-group.yml b/.github/workflows/docker-build-web-container-on-merge-group.yml deleted file mode 100644 index 7b975f476d9..00000000000 --- a/.github/workflows/docker-build-web-container-on-merge-group.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: Build Web Image on Merge Group - -on: - pull_request: - branches: [ "main" ] - merge_group: - types: [checks_requested] - -env: - REGISTRY_IMAGE: danswer/danswer-web-server - -jobs: - build: - runs-on: - group: ${{ matrix.platform == 'linux/amd64' && 'amd64-image-builders' || 'arm64-image-builders' }} - strategy: - fail-fast: false - matrix: - platform: - - linux/amd64 - - linux/arm64 - - steps: - - name: Prepare - run: | - platform=${{ matrix.platform }} - echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - - - name: Checkout - uses: actions/checkout@v4 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY_IMAGE }} - tags: | - type=raw,value=${{ env.REGISTRY_IMAGE }}:latest - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build by digest - id: build - uses: docker/build-push-action@v5 - with: - context: ./web - file: ./web/Dockerfile - platforms: ${{ matrix.platform }} - push: false - build-args: | - DANSWER_VERSION=v0.0.1 - # needed due to weird interactions with the builds for different platforms - no-cache: true - labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/pr-python-checks.yml b/.github/workflows/pr-python-checks.yml index 6c604e93d43..9cc624fa073 100644 --- a/.github/workflows/pr-python-checks.yml +++ b/.github/workflows/pr-python-checks.yml @@ -1,6 +1,7 @@ name: Python Checks on: + merge_group: pull_request: branches: [ main ] diff --git a/.github/workflows/pr-python-tests.yml b/.github/workflows/pr-python-tests.yml index beb890a35f6..7686de019a5 100644 --- a/.github/workflows/pr-python-tests.yml +++ b/.github/workflows/pr-python-tests.yml @@ -1,6 +1,7 @@ name: Python Unit Tests on: + merge_group: pull_request: branches: [ main ] diff --git a/.github/workflows/pr-quality-checks.yml b/.github/workflows/pr-quality-checks.yml index 8e7de1cdd59..8a42541ea5d 100644 --- a/.github/workflows/pr-quality-checks.yml +++ b/.github/workflows/pr-quality-checks.yml @@ -4,18 +4,19 @@ concurrency: cancel-in-progress: true on: + merge_group: pull_request: null jobs: quality-checks: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - uses: pre-commit/action@v3.0.0 - with: - extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }} + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - uses: pre-commit/action@v3.0.0 + with: + extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }} diff --git a/.github/workflows/run-it.yml b/.github/workflows/run-it.yml new file mode 100644 index 00000000000..7c0c1814c3b --- /dev/null +++ b/.github/workflows/run-it.yml @@ -0,0 +1,172 @@ +name: Run Integration Tests +concurrency: + group: Run-Integration-Tests-${{ github.head_ref }} + cancel-in-progress: true + +on: + merge_group: + pull_request: + branches: [ main ] + +env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + +jobs: + integration-tests: + runs-on: + group: 'arm64-image-builders' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Build Web Docker image + uses: docker/build-push-action@v5 + with: + context: ./web + file: ./web/Dockerfile + platforms: linux/arm64 + pull: true + push: true + load: true + tags: danswer/danswer-web-server:it + cache-from: type=registry,ref=danswer/danswer-web-server:it + cache-to: | + type=registry,ref=danswer/danswer-web-server:it,mode=max + type=inline + + - name: Build Backend Docker image + uses: docker/build-push-action@v5 + with: + context: ./backend + file: ./backend/Dockerfile + platforms: linux/arm64 + pull: true + push: true + load: true + tags: danswer/danswer-backend:it + cache-from: type=registry,ref=danswer/danswer-backend:it + cache-to: | + type=registry,ref=danswer/danswer-backend:it,mode=max + type=inline + + - name: Build Model Server Docker image + uses: docker/build-push-action@v5 + with: + context: ./backend + file: ./backend/Dockerfile.model_server + platforms: linux/arm64 + pull: true + push: true + load: true + tags: danswer/danswer-model-server:it + cache-from: type=registry,ref=danswer/danswer-model-server:it + cache-to: | + type=registry,ref=danswer/danswer-model-server:it,mode=max + type=inline + + - name: Build integration test Docker image + uses: docker/build-push-action@v5 + with: + context: ./backend + file: ./backend/tests/integration/Dockerfile + platforms: linux/arm64 + pull: true + push: true + load: true + tags: danswer/integration-test-runner:it + cache-from: type=registry,ref=danswer/integration-test-runner:it + cache-to: | + type=registry,ref=danswer/integration-test-runner:it,mode=max + type=inline + + - name: Start Docker containers + run: | + cd deployment/docker_compose + ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ + IMAGE_TAG=it \ + docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build + id: start_docker + + - name: Wait for service to be ready + run: | + echo "Starting wait-for-service script..." + + start_time=$(date +%s) + timeout=300 # 5 minutes in seconds + + while true; do + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + + if [ $elapsed_time -ge $timeout ]; then + echo "Timeout reached. Service did not become ready in 5 minutes." + exit 1 + fi + + # Use curl with error handling to ignore specific exit code 56 + response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error") + + if [ "$response" = "200" ]; then + echo "Service is ready!" + break + elif [ "$response" = "curl_error" ]; then + echo "Curl encountered an error, possibly exit code 56. Continuing to retry..." + else + echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..." + fi + + sleep 5 + done + echo "Finished waiting for service." + + - name: Run integration tests + run: | + echo "Running integration tests..." + docker run --rm --network danswer-stack_default \ + -e POSTGRES_HOST=relational_db \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_DB=postgres \ + -e VESPA_HOST=index \ + -e API_SERVER_HOST=api_server \ + -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + danswer/integration-test-runner:it + continue-on-error: true + id: run_tests + + - name: Check test results + run: | + if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then + echo "Integration tests failed. Exiting with error." + exit 1 + else + echo "All integration tests passed successfully." + fi + + - name: Save Docker logs + if: success() || failure() + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log + mv docker-compose.log ${{ github.workspace }}/docker-compose.log + + - name: Upload logs + if: success() || failure() + uses: actions/upload-artifact@v3 + with: + name: docker-logs + path: ${{ github.workspace }}/docker-compose.log + + - name: Stop Docker containers + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack down -v diff --git a/.gitignore b/.gitignore index 68f5348e427..15bed8a5983 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ /deployment/data/nginx/app.conf .vscode/launch.json *.sw? +/backend/tests/regression/answer_quality/search_test_config.yaml diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt index 015672a226c..b3fae8cee73 100644 --- a/.vscode/env_template.txt +++ b/.vscode/env_template.txt @@ -15,7 +15,7 @@ LOG_LEVEL=debug # This passes top N results to LLM an additional time for reranking prior to answer generation # This step is quite heavy on token usage so we disable it for dev generally -DISABLE_LLM_CHUNK_FILTER=True +DISABLE_LLM_DOC_RELEVANCE=True # Useful if you want to toggle auth on/off (google_oauth/OIDC specifically) diff --git a/.vscode/launch.template.jsonc b/.vscode/launch.template.jsonc index a4be80fc1c9..9aaadb32acf 100644 --- a/.vscode/launch.template.jsonc +++ b/.vscode/launch.template.jsonc @@ -39,7 +39,8 @@ "--reload", "--port", "9000" - ] + ], + "consoleTitle": "Model Server" }, { "name": "API Server", @@ -58,7 +59,8 @@ "--reload", "--port", "8080" - ] + ], + "consoleTitle": "API Server" }, { "name": "Indexing", @@ -68,11 +70,12 @@ "cwd": "${workspaceFolder}/backend", "envFile": "${workspaceFolder}/.env", "env": { - "ENABLE_MINI_CHUNK": "false", + "ENABLE_MULTIPASS_INDEXING": "false", "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", "PYTHONPATH": "." - } + }, + "consoleTitle": "Indexing" }, // Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev { @@ -90,7 +93,8 @@ }, "args": [ "--no-indexing" - ] + ], + "consoleTitle": "Background Jobs" }, // For the listner to access the Slack API, // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project @@ -125,5 +129,17 @@ //"tests/unit/danswer/llm/answering/test_prune_and_merge.py" ] } + ], + "compounds": [ + { + "name": "Run Danswer", + "configurations": [ + "Web Server", + "Model Server", + "API Server", + "Indexing", + "Background Jobs", + ] + } ] } diff --git a/README.md b/README.md index 8d2b362011d..aff3cd57d5a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Documentation - + Slack diff --git a/backend/Dockerfile b/backend/Dockerfile index 7f9daad94a3..17e0be8c239 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -68,7 +68,9 @@ RUN apt-get update && \ rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key # Pre-downloading models for setups with limited egress -RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')" +RUN python -c "from tokenizers import Tokenizer; \ +Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')" + # Pre-downloading NLTK for setups with limited egress RUN python -c "import nltk; \ diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server index 89f24e2ac26..f2fb1ca44d0 100644 --- a/backend/Dockerfile.model_server +++ b/backend/Dockerfile.model_server @@ -18,14 +18,22 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \ apt-get autoremove -y # Pre-downloading models for setups with limited egress -RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \ -from huggingface_hub import snapshot_download; \ -AutoTokenizer.from_pretrained('danswer/intent-model'); \ -AutoTokenizer.from_pretrained('intfloat/e5-base-v2'); \ +# Download tokenizers, distilbert for the Danswer model +# Download model weights +# Run Nomic to pull in the custom architecture and have it cached locally +RUN python -c "from transformers import AutoTokenizer; \ +AutoTokenizer.from_pretrained('distilbert-base-uncased'); \ AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \ -snapshot_download('danswer/intent-model'); \ -snapshot_download('intfloat/e5-base-v2'); \ -snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')" +from huggingface_hub import snapshot_download; \ +snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \ +snapshot_download('nomic-ai/nomic-embed-text-v1'); \ +snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \ +from sentence_transformers import SentenceTransformer; \ +SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" + +# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while +# running Danswer, don't overwrite it with the built in cache folder +RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface WORKDIR /app diff --git a/backend/alembic/env.py b/backend/alembic/env.py index 8a944689de4..8c028202bfc 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -8,6 +8,7 @@ from sqlalchemy.engine import Connection from sqlalchemy.ext.asyncio import create_async_engine from celery.backends.database.session import ResultModelBase # type: ignore +from sqlalchemy.schema import SchemaItem # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -29,6 +30,20 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. +EXCLUDE_TABLES = {"kombu_queue", "kombu_message"} + + +def include_object( + object: SchemaItem, + name: str, + type_: str, + reflected: bool, + compare_to: SchemaItem | None, +) -> bool: + if type_ == "table" and name in EXCLUDE_TABLES: + return False + return True + def run_migrations_offline() -> None: """Run migrations in 'offline' mode. @@ -55,7 +70,11 @@ def run_migrations_offline() -> None: def do_run_migrations(connection: Connection) -> None: - context.configure(connection=connection, target_metadata=target_metadata) # type: ignore + context.configure( + connection=connection, + target_metadata=target_metadata, # type: ignore + include_object=include_object, + ) # type: ignore with context.begin_transaction(): context.run_migrations() diff --git a/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py b/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py index 2d8912d9bcf..d0b90da0232 100644 --- a/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py +++ b/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py @@ -17,15 +17,11 @@ def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.add_column( "chat_session", sa.Column("current_alternate_model", sa.String(), nullable=True), ) - # ### end Alembic commands ### def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.drop_column("chat_session", "current_alternate_model") - # ### end Alembic commands ### diff --git a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py new file mode 100644 index 00000000000..69eec4c108e --- /dev/null +++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py @@ -0,0 +1,32 @@ +"""add search doc relevance details + +Revision ID: 05c07bf07c00 +Revises: b896bbd0d5a7 +Create Date: 2024-07-10 17:48:15.886653 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "05c07bf07c00" +down_revision = "b896bbd0d5a7" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "search_doc", + sa.Column("is_relevant", sa.Boolean(), nullable=True), + ) + op.add_column( + "search_doc", + sa.Column("relevance_explanation", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("search_doc", "relevance_explanation") + op.drop_column("search_doc", "is_relevant") diff --git a/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py b/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py new file mode 100644 index 00000000000..3f4893bd12c --- /dev/null +++ b/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py @@ -0,0 +1,26 @@ +"""add_indexing_start_to_connector + +Revision ID: 08a1eda20fe1 +Revises: 8a87bd6ec550 +Create Date: 2024-07-23 11:12:39.462397 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "08a1eda20fe1" +down_revision = "8a87bd6ec550" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "connector", sa.Column("indexing_start", sa.DateTime(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("connector", "indexing_start") diff --git a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py new file mode 100644 index 00000000000..42f4c22ed78 --- /dev/null +++ b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py @@ -0,0 +1,135 @@ +"""embedding model -> search settings + +Revision ID: 1f60f60c3401 +Revises: f17bf3b0d9f1 +Create Date: 2024-08-25 12:39:51.731632 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS + +# revision identifiers, used by Alembic. +revision = "1f60f60c3401" +down_revision = "f17bf3b0d9f1" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.drop_constraint( + "index_attempt__embedding_model_fk", "index_attempt", type_="foreignkey" + ) + # Rename the table + op.rename_table("embedding_model", "search_settings") + + # Add new columns + op.add_column( + "search_settings", + sa.Column( + "multipass_indexing", sa.Boolean(), nullable=False, server_default="true" + ), + ) + op.add_column( + "search_settings", + sa.Column( + "multilingual_expansion", + postgresql.ARRAY(sa.String()), + nullable=False, + server_default="{}", + ), + ) + op.add_column( + "search_settings", + sa.Column( + "disable_rerank_for_streaming", + sa.Boolean(), + nullable=False, + server_default="false", + ), + ) + op.add_column( + "search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True) + ) + op.add_column( + "search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True) + ) + op.add_column( + "search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True) + ) + op.add_column( + "search_settings", + sa.Column( + "num_rerank", + sa.Integer(), + nullable=False, + server_default=str(NUM_POSTPROCESSED_RESULTS), + ), + ) + + # Add the new column as nullable initially + op.add_column( + "index_attempt", sa.Column("search_settings_id", sa.Integer(), nullable=True) + ) + + # Populate the new column with data from the existing embedding_model_id + op.execute("UPDATE index_attempt SET search_settings_id = embedding_model_id") + + # Create the foreign key constraint + op.create_foreign_key( + "fk_index_attempt_search_settings", + "index_attempt", + "search_settings", + ["search_settings_id"], + ["id"], + ) + + # Make the new column non-nullable + op.alter_column("index_attempt", "search_settings_id", nullable=False) + + # Drop the old embedding_model_id column + op.drop_column("index_attempt", "embedding_model_id") + + +def downgrade() -> None: + # Add back the embedding_model_id column + op.add_column( + "index_attempt", sa.Column("embedding_model_id", sa.Integer(), nullable=True) + ) + + # Populate the old column with data from search_settings_id + op.execute("UPDATE index_attempt SET embedding_model_id = search_settings_id") + + # Make the old column non-nullable + op.alter_column("index_attempt", "embedding_model_id", nullable=False) + + # Drop the foreign key constraint + op.drop_constraint( + "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey" + ) + + # Drop the new search_settings_id column + op.drop_column("index_attempt", "search_settings_id") + + # Rename the table back + op.rename_table("search_settings", "embedding_model") + + # Remove added columns + op.drop_column("embedding_model", "num_rerank") + op.drop_column("embedding_model", "rerank_api_key") + op.drop_column("embedding_model", "rerank_provider_type") + op.drop_column("embedding_model", "rerank_model_name") + op.drop_column("embedding_model", "disable_rerank_for_streaming") + op.drop_column("embedding_model", "multilingual_expansion") + op.drop_column("embedding_model", "multipass_indexing") + + op.create_foreign_key( + "index_attempt__embedding_model_fk", + "index_attempt", + "embedding_model", + ["embedding_model_id"], + ["id"], + ) diff --git a/backend/alembic/versions/213fd978c6d8_notifications.py b/backend/alembic/versions/213fd978c6d8_notifications.py new file mode 100644 index 00000000000..563556ea50a --- /dev/null +++ b/backend/alembic/versions/213fd978c6d8_notifications.py @@ -0,0 +1,44 @@ +"""notifications + +Revision ID: 213fd978c6d8 +Revises: 5fc1f54cc252 +Create Date: 2024-08-10 11:13:36.070790 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "213fd978c6d8" +down_revision = "5fc1f54cc252" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "notification", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "notif_type", + sa.String(), + nullable=False, + ), + sa.Column( + "user_id", + sa.UUID(), + nullable=True, + ), + sa.Column("dismissed", sa.Boolean(), nullable=False), + sa.Column("last_shown", sa.DateTime(timezone=True), nullable=False), + sa.Column("first_shown", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade() -> None: + op.drop_table("notification") diff --git a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py index 07b5601013b..10d094e0da2 100644 --- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py +++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py @@ -79,7 +79,7 @@ def downgrade() -> None: ) op.create_foreign_key( "document_retrieval_feedback__chat_message_fk", - "document_retrieval", + "document_retrieval_feedback", "chat_message", ["chat_message_id"], ["id"], diff --git a/backend/alembic/versions/27c6ecc08586_permission_framework.py b/backend/alembic/versions/27c6ecc08586_permission_framework.py index cd869e2ba6d..ff41d2f5cff 100644 --- a/backend/alembic/versions/27c6ecc08586_permission_framework.py +++ b/backend/alembic/versions/27c6ecc08586_permission_framework.py @@ -160,12 +160,28 @@ def downgrade() -> None: nullable=False, ), ) - op.drop_constraint( - "fk_index_attempt_credential_id", "index_attempt", type_="foreignkey" - ) - op.drop_constraint( - "fk_index_attempt_connector_id", "index_attempt", type_="foreignkey" - ) + + # Check if the constraint exists before dropping + conn = op.get_bind() + inspector = sa.inspect(conn) + constraints = inspector.get_foreign_keys("index_attempt") + + if any( + constraint["name"] == "fk_index_attempt_credential_id" + for constraint in constraints + ): + op.drop_constraint( + "fk_index_attempt_credential_id", "index_attempt", type_="foreignkey" + ) + + if any( + constraint["name"] == "fk_index_attempt_connector_id" + for constraint in constraints + ): + op.drop_constraint( + "fk_index_attempt_connector_id", "index_attempt", type_="foreignkey" + ) + op.drop_column("index_attempt", "credential_id") op.drop_column("index_attempt", "connector_id") op.drop_table("connector_credential_pair") diff --git a/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py b/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py new file mode 100644 index 00000000000..cab166531ae --- /dev/null +++ b/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py @@ -0,0 +1,32 @@ +"""Add Above Below to Persona + +Revision ID: 2d2304e27d8c +Revises: 4b08d97e175a +Create Date: 2024-08-21 19:15:15.762948 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "2d2304e27d8c" +down_revision = "4b08d97e175a" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("persona", sa.Column("chunks_above", sa.Integer(), nullable=True)) + op.add_column("persona", sa.Column("chunks_below", sa.Integer(), nullable=True)) + + op.execute( + "UPDATE persona SET chunks_above = 1, chunks_below = 1 WHERE chunks_above IS NULL AND chunks_below IS NULL" + ) + + op.alter_column("persona", "chunks_above", nullable=False) + op.alter_column("persona", "chunks_below", nullable=False) + + +def downgrade() -> None: + op.drop_column("persona", "chunks_below") + op.drop_column("persona", "chunks_above") diff --git a/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py b/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py new file mode 100644 index 00000000000..46beab05f5c --- /dev/null +++ b/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py @@ -0,0 +1,70 @@ +"""Add icon_color and icon_shape to Persona + +Revision ID: 325975216eb3 +Revises: 91ffac7e65b3 +Create Date: 2024-07-24 21:29:31.784562 + +""" +import random +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table, column, select + +# revision identifiers, used by Alembic. +revision = "325975216eb3" +down_revision = "91ffac7e65b3" +branch_labels: None = None +depends_on: None = None + + +colorOptions = [ + "#FF6FBF", + "#6FB1FF", + "#B76FFF", + "#FFB56F", + "#6FFF8D", + "#FF6F6F", + "#6FFFFF", +] + + +# Function to generate a random shape ensuring at least 3 of the middle 4 squares are filled +def generate_random_shape() -> int: + center_squares = [12, 10, 6, 14, 13, 11, 7, 15] + center_fill = random.choice(center_squares) + remaining_squares = [i for i in range(16) if not (center_fill & (1 << i))] + random.shuffle(remaining_squares) + for i in range(10 - bin(center_fill).count("1")): + center_fill |= 1 << remaining_squares[i] + return center_fill + + +def upgrade() -> None: + op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True)) + op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True)) + op.add_column("persona", sa.Column("uploaded_image_id", sa.String(), nullable=True)) + + persona = table( + "persona", + column("id", sa.Integer), + column("icon_color", sa.String), + column("icon_shape", sa.Integer), + ) + + conn = op.get_bind() + personas = conn.execute(select(persona.c.id)) + + for persona_id in personas: + random_color = random.choice(colorOptions) + random_shape = generate_random_shape() + conn.execute( + persona.update() + .where(persona.c.id == persona_id[0]) + .values(icon_color=random_color, icon_shape=random_shape) + ) + + +def downgrade() -> None: + op.drop_column("persona", "icon_shape") + op.drop_column("persona", "uploaded_image_id") + op.drop_column("persona", "icon_color") diff --git a/backend/alembic/versions/351faebd379d_add_curator_fields.py b/backend/alembic/versions/351faebd379d_add_curator_fields.py new file mode 100644 index 00000000000..b3254d26c16 --- /dev/null +++ b/backend/alembic/versions/351faebd379d_add_curator_fields.py @@ -0,0 +1,90 @@ +"""Add curator fields + +Revision ID: 351faebd379d +Revises: ee3f4b47fad5 +Create Date: 2024-08-15 22:37:08.397052 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "351faebd379d" +down_revision = "ee3f4b47fad5" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Add is_curator column to User__UserGroup table + op.add_column( + "user__user_group", + sa.Column("is_curator", sa.Boolean(), nullable=False, server_default="false"), + ) + + # Use batch mode to modify the enum type + with op.batch_alter_table("user", schema=None) as batch_op: + batch_op.alter_column( # type: ignore[attr-defined] + "role", + type_=sa.Enum( + "BASIC", + "ADMIN", + "CURATOR", + "GLOBAL_CURATOR", + name="userrole", + native_enum=False, + ), + existing_type=sa.Enum("BASIC", "ADMIN", name="userrole", native_enum=False), + existing_nullable=False, + ) + # Create the association table + op.create_table( + "credential__user_group", + sa.Column("credential_id", sa.Integer(), nullable=False), + sa.Column("user_group_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["credential_id"], + ["credential.id"], + ), + sa.ForeignKeyConstraint( + ["user_group_id"], + ["user_group.id"], + ), + sa.PrimaryKeyConstraint("credential_id", "user_group_id"), + ) + op.add_column( + "credential", + sa.Column( + "curator_public", sa.Boolean(), nullable=False, server_default="false" + ), + ) + + +def downgrade() -> None: + # Update existing records to ensure they fit within the BASIC/ADMIN roles + op.execute( + "UPDATE \"user\" SET role = 'ADMIN' WHERE role IN ('CURATOR', 'GLOBAL_CURATOR')" + ) + + # Remove is_curator column from User__UserGroup table + op.drop_column("user__user_group", "is_curator") + + with op.batch_alter_table("user", schema=None) as batch_op: + batch_op.alter_column( # type: ignore[attr-defined] + "role", + type_=sa.Enum( + "BASIC", "ADMIN", name="userrole", native_enum=False, length=20 + ), + existing_type=sa.Enum( + "BASIC", + "ADMIN", + "CURATOR", + "GLOBAL_CURATOR", + name="userrole", + native_enum=False, + ), + existing_nullable=False, + ) + # Drop the association table + op.drop_table("credential__user_group") + op.drop_column("credential", "curator_public") diff --git a/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py b/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py index 5e50c02c8bf..bfde0162ba2 100644 --- a/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py +++ b/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py @@ -18,7 +18,6 @@ def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.add_column( "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True) ) @@ -29,10 +28,8 @@ def upgrade() -> None: ["alternate_assistant_id"], ["id"], ) - # ### end Alembic commands ### def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.drop_constraint("fk_chat_message_persona", "chat_message", type_="foreignkey") op.drop_column("chat_message", "alternate_assistant_id") diff --git a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py new file mode 100644 index 00000000000..6aa2ffca0a6 --- /dev/null +++ b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py @@ -0,0 +1,42 @@ +"""Rename index_origin to index_recursively + +Revision ID: 1d6ad76d1f37 +Revises: e1392f05e840 +Create Date: 2024-08-01 12:38:54.466081 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "1d6ad76d1f37" +down_revision = "e1392f05e840" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE connector + SET connector_specific_config = jsonb_set( + connector_specific_config, + '{index_recursively}', + 'true'::jsonb + ) - 'index_origin' + WHERE connector_specific_config ? 'index_origin' + """ + ) + + +def downgrade() -> None: + op.execute( + """ + UPDATE connector + SET connector_specific_config = jsonb_set( + connector_specific_config, + '{index_origin}', + connector_specific_config->'index_recursively' + ) - 'index_recursively' + WHERE connector_specific_config ? 'index_recursively' + """ + ) diff --git a/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py b/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py new file mode 100644 index 00000000000..2d0e1a32f98 --- /dev/null +++ b/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py @@ -0,0 +1,65 @@ +"""add cloud embedding model and update embedding_model + +Revision ID: 44f856ae2a4a +Revises: d716b0791ddd +Create Date: 2024-06-28 20:01:05.927647 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "44f856ae2a4a" +down_revision = "d716b0791ddd" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Create embedding_provider table + op.create_table( + "embedding_provider", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("api_key", sa.LargeBinary(), nullable=True), + sa.Column("default_model_id", sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("name"), + ) + + # Add cloud_provider_id to embedding_model table + op.add_column( + "embedding_model", sa.Column("cloud_provider_id", sa.Integer(), nullable=True) + ) + + # Add foreign key constraints + op.create_foreign_key( + "fk_embedding_model_cloud_provider", + "embedding_model", + "embedding_provider", + ["cloud_provider_id"], + ["id"], + ) + op.create_foreign_key( + "fk_embedding_provider_default_model", + "embedding_provider", + "embedding_model", + ["default_model_id"], + ["id"], + ) + + +def downgrade() -> None: + # Remove foreign key constraints + op.drop_constraint( + "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey" + ) + op.drop_constraint( + "fk_embedding_provider_default_model", "embedding_provider", type_="foreignkey" + ) + + # Remove cloud_provider_id column + op.drop_column("embedding_model", "cloud_provider_id") + + # Drop embedding_provider table + op.drop_table("embedding_provider") diff --git a/backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py b/backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py new file mode 100644 index 00000000000..2e3f377e372 --- /dev/null +++ b/backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py @@ -0,0 +1,49 @@ +"""Add display_model_names to llm_provider + +Revision ID: 473a1a7ca408 +Revises: 325975216eb3 +Create Date: 2024-07-25 14:31:02.002917 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "473a1a7ca408" +down_revision = "325975216eb3" +branch_labels: None = None +depends_on: None = None + +default_models_by_provider = { + "openai": ["gpt-4", "gpt-4o", "gpt-4o-mini"], + "bedrock": [ + "meta.llama3-1-70b-instruct-v1:0", + "meta.llama3-1-8b-instruct-v1:0", + "anthropic.claude-3-opus-20240229-v1:0", + "mistral.mistral-large-2402-v1:0", + "anthropic.claude-3-5-sonnet-20240620-v1:0", + ], + "anthropic": ["claude-3-opus-20240229", "claude-3-5-sonnet-20240620"], +} + + +def upgrade() -> None: + op.add_column( + "llm_provider", + sa.Column("display_model_names", postgresql.ARRAY(sa.String()), nullable=True), + ) + + connection = op.get_bind() + for provider, models in default_models_by_provider.items(): + connection.execute( + sa.text( + "UPDATE llm_provider SET display_model_names = :models WHERE provider = :provider" + ), + {"models": models, "provider": provider}, + ) + + +def downgrade() -> None: + op.drop_column("llm_provider", "display_model_names") diff --git a/backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py b/backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py new file mode 100644 index 00000000000..3deebaecd39 --- /dev/null +++ b/backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py @@ -0,0 +1,80 @@ +"""Moved status to connector credential pair + +Revision ID: 4a951134c801 +Revises: 7477a5f5d728 +Create Date: 2024-08-10 19:20:34.527559 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "4a951134c801" +down_revision = "7477a5f5d728" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "connector_credential_pair", + sa.Column( + "status", + sa.Enum( + "ACTIVE", + "PAUSED", + "DELETING", + name="connectorcredentialpairstatus", + native_enum=False, + ), + nullable=True, + ), + ) + + # Update status of connector_credential_pair based on connector's disabled status + op.execute( + """ + UPDATE connector_credential_pair + SET status = CASE + WHEN ( + SELECT disabled + FROM connector + WHERE connector.id = connector_credential_pair.connector_id + ) = FALSE THEN 'ACTIVE' + ELSE 'PAUSED' + END + """ + ) + + # Make the status column not nullable after setting values + op.alter_column("connector_credential_pair", "status", nullable=False) + + op.drop_column("connector", "disabled") + + +def downgrade() -> None: + op.add_column( + "connector", + sa.Column("disabled", sa.BOOLEAN(), autoincrement=False, nullable=True), + ) + + # Update disabled status of connector based on connector_credential_pair's status + op.execute( + """ + UPDATE connector + SET disabled = CASE + WHEN EXISTS ( + SELECT 1 + FROM connector_credential_pair + WHERE connector_credential_pair.connector_id = connector.id + AND connector_credential_pair.status = 'ACTIVE' + ) THEN FALSE + ELSE TRUE + END + """ + ) + + # Make the disabled column not nullable after setting values + op.alter_column("connector", "disabled", nullable=False) + + op.drop_column("connector_credential_pair", "status") diff --git a/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py b/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py new file mode 100644 index 00000000000..29316adb1df --- /dev/null +++ b/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py @@ -0,0 +1,34 @@ +"""change default prune_freq + +Revision ID: 4b08d97e175a +Revises: d9ec13955951 +Create Date: 2024-08-20 15:28:52.993827 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "4b08d97e175a" +down_revision = "d9ec13955951" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE connector + SET prune_freq = 2592000 + WHERE prune_freq = 86400 + """ + ) + + +def downgrade() -> None: + op.execute( + """ + UPDATE connector + SET prune_freq = 86400 + WHERE prune_freq = 2592000 + """ + ) diff --git a/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py b/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py new file mode 100644 index 00000000000..8077b24b095 --- /dev/null +++ b/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py @@ -0,0 +1,72 @@ +"""Add type to credentials + +Revision ID: 4ea2c93919c1 +Revises: 473a1a7ca408 +Create Date: 2024-07-18 13:07:13.655895 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "4ea2c93919c1" +down_revision = "473a1a7ca408" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Add the new 'source' column to the 'credential' table + op.add_column( + "credential", + sa.Column( + "source", + sa.String(length=100), # Use String instead of Enum + nullable=True, # Initially allow NULL values + ), + ) + op.add_column( + "credential", + sa.Column( + "name", + sa.String(), + nullable=True, + ), + ) + + # Create a temporary table that maps each credential to a single connector source. + # This is needed because a credential can be associated with multiple connectors, + # but we want to assign a single source to each credential. + # We use DISTINCT ON to ensure we only get one row per credential_id. + op.execute( + """ + CREATE TEMPORARY TABLE temp_connector_credential AS + SELECT DISTINCT ON (cc.credential_id) + cc.credential_id, + c.source AS connector_source + FROM connector_credential_pair cc + JOIN connector c ON cc.connector_id = c.id + """ + ) + + # Update the 'source' column in the 'credential' table + op.execute( + """ + UPDATE credential cred + SET source = COALESCE( + (SELECT connector_source + FROM temp_connector_credential temp + WHERE cred.id = temp.credential_id), + 'NOT_APPLICABLE' + ) + """ + ) + # If no exception was raised, alter the column + op.alter_column("credential", "source", nullable=True) # TODO modify + # # ### end Alembic commands ### + + +def downgrade() -> None: + op.drop_column("credential", "source") + op.drop_column("credential", "name") diff --git a/backend/alembic/versions/5fc1f54cc252_hybrid_enum.py b/backend/alembic/versions/5fc1f54cc252_hybrid_enum.py new file mode 100644 index 00000000000..63b1e7875a9 --- /dev/null +++ b/backend/alembic/versions/5fc1f54cc252_hybrid_enum.py @@ -0,0 +1,25 @@ +"""hybrid-enum + +Revision ID: 5fc1f54cc252 +Revises: 1d6ad76d1f37 +Create Date: 2024-08-06 15:35:40.278485 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "5fc1f54cc252" +down_revision = "1d6ad76d1f37" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.drop_column("persona", "search_type") + + +def downgrade() -> None: + op.add_column("persona", sa.Column("search_type", sa.String(), nullable=True)) + op.execute("UPDATE persona SET search_type = 'SEMANTIC'") + op.alter_column("persona", "search_type", nullable=False) diff --git a/backend/alembic/versions/7477a5f5d728_added_model_defaults_for_users.py b/backend/alembic/versions/7477a5f5d728_added_model_defaults_for_users.py new file mode 100644 index 00000000000..6efb9840526 --- /dev/null +++ b/backend/alembic/versions/7477a5f5d728_added_model_defaults_for_users.py @@ -0,0 +1,24 @@ +"""Added model defaults for users + +Revision ID: 7477a5f5d728 +Revises: 213fd978c6d8 +Create Date: 2024-08-04 19:00:04.512634 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "7477a5f5d728" +down_revision = "213fd978c6d8" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("user", sa.Column("default_model", sa.Text(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("user", "default_model") diff --git a/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py b/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py index c609ca4ae04..1dfbb9365d8 100644 --- a/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py +++ b/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py @@ -28,5 +28,9 @@ def upgrade() -> None: def downgrade() -> None: - # This wasn't really required by the code either, no good reason to make it unique again - pass + op.create_unique_constraint( + "connector_credential_pair__name__key", "connector_credential_pair", ["name"] + ) + op.alter_column( + "connector_credential_pair", "name", existing_type=sa.String(), nullable=True + ) diff --git a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py index 6fb8c5ac0e1..c2ba10b3875 100644 --- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py +++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py @@ -10,7 +10,7 @@ from danswer.db.models import IndexModelStatus from danswer.search.enums import RecencyBiasSetting -from danswer.search.models import SearchType +from danswer.search.enums import SearchType # revision identifiers, used by Alembic. revision = "776b3bbe9092" diff --git a/backend/alembic/versions/795b20b85b4b_add_llm_group_permissions_control.py b/backend/alembic/versions/795b20b85b4b_add_llm_group_permissions_control.py new file mode 100644 index 00000000000..8b7fb9a2b8d --- /dev/null +++ b/backend/alembic/versions/795b20b85b4b_add_llm_group_permissions_control.py @@ -0,0 +1,41 @@ +"""add_llm_group_permissions_control + +Revision ID: 795b20b85b4b +Revises: 05c07bf07c00 +Create Date: 2024-07-19 11:54:35.701558 + +""" +from alembic import op +import sqlalchemy as sa + + +revision = "795b20b85b4b" +down_revision = "05c07bf07c00" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "llm_provider__user_group", + sa.Column("llm_provider_id", sa.Integer(), nullable=False), + sa.Column("user_group_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["llm_provider_id"], + ["llm_provider.id"], + ), + sa.ForeignKeyConstraint( + ["user_group_id"], + ["user_group.id"], + ), + sa.PrimaryKeyConstraint("llm_provider_id", "user_group_id"), + ) + op.add_column( + "llm_provider", + sa.Column("is_public", sa.Boolean(), nullable=False, server_default="true"), + ) + + +def downgrade() -> None: + op.drop_table("llm_provider__user_group") + op.drop_column("llm_provider", "is_public") diff --git a/backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py b/backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py index a07b94bd925..b41e18f856c 100644 --- a/backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py +++ b/backend/alembic/versions/7aea705850d5_added_slack_auto_filter.py @@ -10,8 +10,8 @@ revision = "7aea705850d5" down_revision = "4505fd7302e1" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/8a87bd6ec550_associate_index_attempts_with_ccpair.py b/backend/alembic/versions/8a87bd6ec550_associate_index_attempts_with_ccpair.py new file mode 100644 index 00000000000..166c4b7ba18 --- /dev/null +++ b/backend/alembic/versions/8a87bd6ec550_associate_index_attempts_with_ccpair.py @@ -0,0 +1,107 @@ +"""associate index attempts with ccpair + +Revision ID: 8a87bd6ec550 +Revises: 4ea2c93919c1 +Create Date: 2024-07-22 15:15:52.558451 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "8a87bd6ec550" +down_revision = "4ea2c93919c1" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Add the new connector_credential_pair_id column + op.add_column( + "index_attempt", + sa.Column("connector_credential_pair_id", sa.Integer(), nullable=True), + ) + + # Create a foreign key constraint to the connector_credential_pair table + op.create_foreign_key( + "fk_index_attempt_connector_credential_pair_id", + "index_attempt", + "connector_credential_pair", + ["connector_credential_pair_id"], + ["id"], + ) + + # Populate the new connector_credential_pair_id column using existing connector_id and credential_id + op.execute( + """ + UPDATE index_attempt ia + SET connector_credential_pair_id = ( + SELECT id FROM connector_credential_pair ccp + WHERE + (ia.connector_id IS NULL OR ccp.connector_id = ia.connector_id) + AND (ia.credential_id IS NULL OR ccp.credential_id = ia.credential_id) + LIMIT 1 + ) + WHERE ia.connector_id IS NOT NULL OR ia.credential_id IS NOT NULL + """ + ) + + # For good measure + op.execute( + """ + DELETE FROM index_attempt + WHERE connector_credential_pair_id IS NULL + """ + ) + + # Make the new connector_credential_pair_id column non-nullable + op.alter_column("index_attempt", "connector_credential_pair_id", nullable=False) + + # Drop the old connector_id and credential_id columns + op.drop_column("index_attempt", "connector_id") + op.drop_column("index_attempt", "credential_id") + + # Update the index to use connector_credential_pair_id + op.create_index( + "ix_index_attempt_latest_for_connector_credential_pair", + "index_attempt", + ["connector_credential_pair_id", "time_created"], + ) + + +def downgrade() -> None: + # Add back the old connector_id and credential_id columns + op.add_column( + "index_attempt", sa.Column("connector_id", sa.Integer(), nullable=True) + ) + op.add_column( + "index_attempt", sa.Column("credential_id", sa.Integer(), nullable=True) + ) + + # Populate the old connector_id and credential_id columns using the connector_credential_pair_id + op.execute( + """ + UPDATE index_attempt ia + SET connector_id = ccp.connector_id, credential_id = ccp.credential_id + FROM connector_credential_pair ccp + WHERE ia.connector_credential_pair_id = ccp.id + """ + ) + + # Make the old connector_id and credential_id columns non-nullable + op.alter_column("index_attempt", "connector_id", nullable=False) + op.alter_column("index_attempt", "credential_id", nullable=False) + + # Drop the new connector_credential_pair_id column + op.drop_constraint( + "fk_index_attempt_connector_credential_pair_id", + "index_attempt", + type_="foreignkey", + ) + op.drop_column("index_attempt", "connector_credential_pair_id") + + op.create_index( + "ix_index_attempt_latest_for_connector_credential_pair", + "index_attempt", + ["connector_id", "credential_id", "time_created"], + ) diff --git a/backend/alembic/versions/91ffac7e65b3_add_expiry_time.py b/backend/alembic/versions/91ffac7e65b3_add_expiry_time.py new file mode 100644 index 00000000000..7c029b3c9cf --- /dev/null +++ b/backend/alembic/versions/91ffac7e65b3_add_expiry_time.py @@ -0,0 +1,26 @@ +"""add expiry time + +Revision ID: 91ffac7e65b3 +Revises: bc9771dccadf +Create Date: 2024-06-24 09:39:56.462242 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "91ffac7e65b3" +down_revision = "795b20b85b4b" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "user", sa.Column("oidc_expiry", sa.DateTime(timezone=True), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("user", "oidc_expiry") diff --git a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py index cdf1722b3a5..a6938e365c6 100644 --- a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py +++ b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py @@ -16,7 +16,6 @@ def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.alter_column( "connector_credential_pair", "last_attempt_status", @@ -29,11 +28,9 @@ def upgrade() -> None: ), nullable=True, ) - # ### end Alembic commands ### def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### op.alter_column( "connector_credential_pair", "last_attempt_status", @@ -46,4 +43,3 @@ def downgrade() -> None: ), nullable=False, ) - # ### end Alembic commands ### diff --git a/backend/alembic/versions/b896bbd0d5a7_backfill_is_internet_data_to_false.py b/backend/alembic/versions/b896bbd0d5a7_backfill_is_internet_data_to_false.py new file mode 100644 index 00000000000..9deac574b28 --- /dev/null +++ b/backend/alembic/versions/b896bbd0d5a7_backfill_is_internet_data_to_false.py @@ -0,0 +1,23 @@ +"""backfill is_internet data to False + +Revision ID: b896bbd0d5a7 +Revises: 44f856ae2a4a +Create Date: 2024-07-16 15:21:05.718571 + +""" +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "b896bbd0d5a7" +down_revision = "44f856ae2a4a" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute("UPDATE search_doc SET is_internet = FALSE WHERE is_internet IS NULL") + + +def downgrade() -> None: + pass diff --git a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py new file mode 100644 index 00000000000..e4808042aae --- /dev/null +++ b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py @@ -0,0 +1,57 @@ +"""Add index_attempt_errors table + +Revision ID: c5b692fa265c +Revises: 4a951134c801 +Create Date: 2024-08-08 14:06:39.581972 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "c5b692fa265c" +down_revision = "4a951134c801" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "index_attempt_errors", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("index_attempt_id", sa.Integer(), nullable=True), + sa.Column("batch", sa.Integer(), nullable=True), + sa.Column( + "doc_summaries", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + ), + sa.Column("error_msg", sa.Text(), nullable=True), + sa.Column("traceback", sa.Text(), nullable=True), + sa.Column( + "time_created", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["index_attempt_id"], + ["index_attempt.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "index_attempt_id", + "index_attempt_errors", + ["time_created"], + unique=False, + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("index_attempt_id", table_name="index_attempt_errors") + op.drop_table("index_attempt_errors") + # ### end Alembic commands ### diff --git a/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py b/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py index aa4e7c71c1e..5ef63ed331c 100644 --- a/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py +++ b/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py @@ -19,6 +19,9 @@ def upgrade() -> None: op.drop_table("deletion_attempt") + # Remove the DeletionStatus enum + op.execute("DROP TYPE IF EXISTS deletionstatus;") + def downgrade() -> None: op.create_table( diff --git a/backend/alembic/versions/d716b0791ddd_combined_slack_id_fields.py b/backend/alembic/versions/d716b0791ddd_combined_slack_id_fields.py new file mode 100644 index 00000000000..6510d8b39da --- /dev/null +++ b/backend/alembic/versions/d716b0791ddd_combined_slack_id_fields.py @@ -0,0 +1,45 @@ +"""combined slack id fields + +Revision ID: d716b0791ddd +Revises: 7aea705850d5 +Create Date: 2024-07-10 17:57:45.630550 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "d716b0791ddd" +down_revision = "7aea705850d5" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE slack_bot_config + SET channel_config = jsonb_set( + channel_config, + '{respond_member_group_list}', + coalesce(channel_config->'respond_team_member_list', '[]'::jsonb) || + coalesce(channel_config->'respond_slack_group_list', '[]'::jsonb) + ) - 'respond_team_member_list' - 'respond_slack_group_list' + """ + ) + + +def downgrade() -> None: + op.execute( + """ + UPDATE slack_bot_config + SET channel_config = jsonb_set( + jsonb_set( + channel_config - 'respond_member_group_list', + '{respond_team_member_list}', + '[]'::jsonb + ), + '{respond_slack_group_list}', + '[]'::jsonb + ) + """ + ) diff --git a/backend/alembic/versions/d9ec13955951_remove__dim_suffix_from_model_name.py b/backend/alembic/versions/d9ec13955951_remove__dim_suffix_from_model_name.py new file mode 100644 index 00000000000..0e84d5fe85a --- /dev/null +++ b/backend/alembic/versions/d9ec13955951_remove__dim_suffix_from_model_name.py @@ -0,0 +1,31 @@ +"""Remove _alt suffix from model_name + +Revision ID: d9ec13955951 +Revises: da4c21c69164 +Create Date: 2024-08-20 16:31:32.955686 + +""" + +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "d9ec13955951" +down_revision = "da4c21c69164" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE embedding_model + SET model_name = regexp_replace(model_name, '__danswer_alt_index$', '') + WHERE model_name LIKE '%__danswer_alt_index' + """ + ) + + +def downgrade() -> None: + # We can't reliably add the __danswer_alt_index suffix back, so we'll leave this empty + pass diff --git a/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py new file mode 100644 index 00000000000..95b53cbeb41 --- /dev/null +++ b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py @@ -0,0 +1,65 @@ +"""chosen_assistants changed to jsonb + +Revision ID: da4c21c69164 +Revises: c5b692fa265c +Create Date: 2024-08-18 19:06:47.291491 + +""" +import json +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "da4c21c69164" +down_revision = "c5b692fa265c" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + conn = op.get_bind() + existing_ids_and_chosen_assistants = conn.execute( + sa.text("select id, chosen_assistants from public.user") + ) + op.drop_column( + "user", + "chosen_assistants", + ) + op.add_column( + "user", + sa.Column( + "chosen_assistants", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + for id, chosen_assistants in existing_ids_and_chosen_assistants: + conn.execute( + sa.text( + "update public.user set chosen_assistants = :chosen_assistants where id = :id" + ), + {"chosen_assistants": json.dumps(chosen_assistants), "id": id}, + ) + + +def downgrade() -> None: + conn = op.get_bind() + existing_ids_and_chosen_assistants = conn.execute( + sa.text("select id, chosen_assistants from public.user") + ) + op.drop_column( + "user", + "chosen_assistants", + ) + op.add_column( + "user", + sa.Column("chosen_assistants", postgresql.ARRAY(sa.Integer()), nullable=True), + ) + for id, chosen_assistants in existing_ids_and_chosen_assistants: + conn.execute( + sa.text( + "update public.user set chosen_assistants = :chosen_assistants where id = :id" + ), + {"chosen_assistants": chosen_assistants, "id": id}, + ) diff --git a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py index a7c9b8f5aee..6274b3e1334 100644 --- a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py +++ b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py @@ -9,7 +9,7 @@ import sqlalchemy as sa from sqlalchemy import table, column, String, Integer, Boolean -from danswer.db.embedding_model import ( +from danswer.db.search_settings import ( get_new_default_embedding_model, get_old_default_embedding_model, user_has_overridden_embedding_model, @@ -71,14 +71,14 @@ def upgrade() -> None: "query_prefix": old_embedding_model.query_prefix, "passage_prefix": old_embedding_model.passage_prefix, "index_name": old_embedding_model.index_name, - "status": old_embedding_model.status, + "status": IndexModelStatus.PRESENT, } ], ) # if the user has not overridden the default embedding model via env variables, # insert the new default model into the database to auto-upgrade them if not user_has_overridden_embedding_model(): - new_embedding_model = get_new_default_embedding_model(is_present=False) + new_embedding_model = get_new_default_embedding_model() op.bulk_insert( EmbeddingModel, [ @@ -136,4 +136,4 @@ def downgrade() -> None: ) op.drop_column("index_attempt", "embedding_model_id") op.drop_table("embedding_model") - op.execute("DROP TYPE indexmodelstatus;") + op.execute("DROP TYPE IF EXISTS indexmodelstatus;") diff --git a/backend/alembic/versions/e1392f05e840_added_input_prompts.py b/backend/alembic/versions/e1392f05e840_added_input_prompts.py new file mode 100644 index 00000000000..dd358220f7e --- /dev/null +++ b/backend/alembic/versions/e1392f05e840_added_input_prompts.py @@ -0,0 +1,58 @@ +"""Added input prompts + +Revision ID: e1392f05e840 +Revises: 08a1eda20fe1 +Create Date: 2024-07-13 19:09:22.556224 + +""" + +import fastapi_users_db_sqlalchemy + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "e1392f05e840" +down_revision = "08a1eda20fe1" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "inputprompt", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("prompt", sa.String(), nullable=False), + sa.Column("content", sa.String(), nullable=False), + sa.Column("active", sa.Boolean(), nullable=False), + sa.Column("is_public", sa.Boolean(), nullable=False), + sa.Column( + "user_id", + fastapi_users_db_sqlalchemy.generics.GUID(), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "inputprompt__user", + sa.Column("input_prompt_id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["input_prompt_id"], + ["inputprompt.id"], + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["inputprompt.id"], + ), + sa.PrimaryKeyConstraint("input_prompt_id", "user_id"), + ) + + +def downgrade() -> None: + op.drop_table("inputprompt__user") + op.drop_table("inputprompt") diff --git a/backend/alembic/versions/ee3f4b47fad5_added_alternate_model_to_chat_message.py b/backend/alembic/versions/ee3f4b47fad5_added_alternate_model_to_chat_message.py new file mode 100644 index 00000000000..64ffdad25f7 --- /dev/null +++ b/backend/alembic/versions/ee3f4b47fad5_added_alternate_model_to_chat_message.py @@ -0,0 +1,28 @@ +"""Added alternate model to chat message + +Revision ID: ee3f4b47fad5 +Revises: 2d2304e27d8c +Create Date: 2024-08-12 00:11:50.915845 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "ee3f4b47fad5" +down_revision = "2d2304e27d8c" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_message", + sa.Column("overridden_model", sa.String(length=255), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("chat_message", "overridden_model") diff --git a/backend/alembic/versions/f17bf3b0d9f1_embedding_provider_by_provider_type.py b/backend/alembic/versions/f17bf3b0d9f1_embedding_provider_by_provider_type.py new file mode 100644 index 00000000000..a141f946d2a --- /dev/null +++ b/backend/alembic/versions/f17bf3b0d9f1_embedding_provider_by_provider_type.py @@ -0,0 +1,172 @@ +"""embedding provider by provider type + +Revision ID: f17bf3b0d9f1 +Revises: 351faebd379d +Create Date: 2024-08-21 13:13:31.120460 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "f17bf3b0d9f1" +down_revision = "351faebd379d" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Add provider_type column to embedding_provider + op.add_column( + "embedding_provider", + sa.Column("provider_type", sa.String(50), nullable=True), + ) + + # Update provider_type with existing name values + op.execute("UPDATE embedding_provider SET provider_type = UPPER(name)") + + # Make provider_type not nullable + op.alter_column("embedding_provider", "provider_type", nullable=False) + + # Drop the foreign key constraint in embedding_model table + op.drop_constraint( + "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey" + ) + + # Drop the existing primary key constraint + op.drop_constraint("embedding_provider_pkey", "embedding_provider", type_="primary") + + # Create a new primary key constraint on provider_type + op.create_primary_key( + "embedding_provider_pkey", "embedding_provider", ["provider_type"] + ) + + # Add provider_type column to embedding_model + op.add_column( + "embedding_model", + sa.Column("provider_type", sa.String(50), nullable=True), + ) + + # Update provider_type for existing embedding models + op.execute( + """ + UPDATE embedding_model + SET provider_type = ( + SELECT provider_type + FROM embedding_provider + WHERE embedding_provider.id = embedding_model.cloud_provider_id + ) + """ + ) + + # Drop the old id column from embedding_provider + op.drop_column("embedding_provider", "id") + + # Drop the name column from embedding_provider + op.drop_column("embedding_provider", "name") + + # Drop the default_model_id column from embedding_provider + op.drop_column("embedding_provider", "default_model_id") + + # Drop the old cloud_provider_id column from embedding_model + op.drop_column("embedding_model", "cloud_provider_id") + + # Create the new foreign key constraint + op.create_foreign_key( + "fk_embedding_model_cloud_provider", + "embedding_model", + "embedding_provider", + ["provider_type"], + ["provider_type"], + ) + + +def downgrade() -> None: + # Drop the foreign key constraint in embedding_model table + op.drop_constraint( + "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey" + ) + + # Add back the cloud_provider_id column to embedding_model + op.add_column( + "embedding_model", sa.Column("cloud_provider_id", sa.Integer(), nullable=True) + ) + op.add_column("embedding_provider", sa.Column("id", sa.Integer(), nullable=True)) + + # Assign incrementing IDs to embedding providers + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS embedding_provider_id_seq;""" + ) + op.execute( + """ + UPDATE embedding_provider SET id = nextval('embedding_provider_id_seq'); + """ + ) + + # Update cloud_provider_id based on provider_type + op.execute( + """ + UPDATE embedding_model + SET cloud_provider_id = CASE + WHEN provider_type IS NULL THEN NULL + ELSE ( + SELECT id + FROM embedding_provider + WHERE embedding_provider.provider_type = embedding_model.provider_type + ) + END + """ + ) + + # Drop the provider_type column from embedding_model + op.drop_column("embedding_model", "provider_type") + + # Add back the columns to embedding_provider + op.add_column("embedding_provider", sa.Column("name", sa.String(50), nullable=True)) + op.add_column( + "embedding_provider", sa.Column("default_model_id", sa.Integer(), nullable=True) + ) + + # Drop the existing primary key constraint on provider_type + op.drop_constraint("embedding_provider_pkey", "embedding_provider", type_="primary") + + # Create the original primary key constraint on id + op.create_primary_key("embedding_provider_pkey", "embedding_provider", ["id"]) + + # Update name with existing provider_type values + op.execute( + """ + UPDATE embedding_provider + SET name = CASE + WHEN provider_type = 'OPENAI' THEN 'OpenAI' + WHEN provider_type = 'COHERE' THEN 'Cohere' + WHEN provider_type = 'GOOGLE' THEN 'Google' + WHEN provider_type = 'VOYAGE' THEN 'Voyage' + ELSE provider_type + END + """ + ) + + # Drop the provider_type column from embedding_provider + op.drop_column("embedding_provider", "provider_type") + + # Recreate the foreign key constraint in embedding_model table + op.create_foreign_key( + "fk_embedding_model_cloud_provider", + "embedding_model", + "embedding_provider", + ["cloud_provider_id"], + ["id"], + ) + + # Recreate the foreign key constraint in embedding_model table + op.create_foreign_key( + "fk_embedding_provider_default_model", + "embedding_provider", + "embedding_model", + ["default_model_id"], + ["id"], + ) diff --git a/backend/danswer/access/access.py b/backend/danswer/access/access.py index 51f5a300c49..5501980ab48 100644 --- a/backend/danswer/access/access.py +++ b/backend/danswer/access/access.py @@ -5,19 +5,16 @@ from danswer.configs.constants import PUBLIC_DOC_PAT from danswer.db.document import get_acccess_info_for_documents from danswer.db.models import User -from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.utils.variable_functionality import fetch_versioned_implementation def _get_access_for_documents( document_ids: list[str], db_session: Session, - cc_pair_to_delete: ConnectorCredentialPairIdentifier | None = None, ) -> dict[str, DocumentAccess]: document_access_info = get_acccess_info_for_documents( db_session=db_session, document_ids=document_ids, - cc_pair_to_delete=cc_pair_to_delete, ) return { document_id: DocumentAccess.build(user_ids, [], is_public) @@ -28,14 +25,13 @@ def _get_access_for_documents( def get_access_for_documents( document_ids: list[str], db_session: Session, - cc_pair_to_delete: ConnectorCredentialPairIdentifier | None = None, ) -> dict[str, DocumentAccess]: """Fetches all access information for the given documents.""" versioned_get_access_for_documents_fn = fetch_versioned_implementation( "danswer.access.access", "_get_access_for_documents" ) return versioned_get_access_for_documents_fn( - document_ids, db_session, cc_pair_to_delete + document_ids, db_session ) # type: ignore diff --git a/backend/danswer/auth/invited_users.py b/backend/danswer/auth/invited_users.py index 56a02fc60c4..efce858f265 100644 --- a/backend/danswer/auth/invited_users.py +++ b/backend/danswer/auth/invited_users.py @@ -1,21 +1,20 @@ from typing import cast +from danswer.configs.constants import KV_USER_STORE_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import JSON_ro -USER_STORE_KEY = "INVITED_USERS" - def get_invited_users() -> list[str]: try: store = get_dynamic_config_store() - return cast(list, store.load(USER_STORE_KEY)) + return cast(list, store.load(KV_USER_STORE_KEY)) except ConfigNotFoundError: return list() def write_invited_users(emails: list[str]) -> int: store = get_dynamic_config_store() - store.store(USER_STORE_KEY, cast(JSON_ro, emails)) + store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails)) return len(emails) diff --git a/backend/danswer/auth/noauth_user.py b/backend/danswer/auth/noauth_user.py index 4744c4a6488..9520ef41c23 100644 --- a/backend/danswer/auth/noauth_user.py +++ b/backend/danswer/auth/noauth_user.py @@ -3,29 +3,27 @@ from typing import cast from danswer.auth.schemas import UserRole +from danswer.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY from danswer.dynamic_configs.store import ConfigNotFoundError from danswer.dynamic_configs.store import DynamicConfigStore from danswer.server.manage.models import UserInfo from danswer.server.manage.models import UserPreferences -NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" - - def set_no_auth_user_preferences( store: DynamicConfigStore, preferences: UserPreferences ) -> None: - store.store(NO_AUTH_USER_PREFERENCES_KEY, preferences.dict()) + store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump()) def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences: try: preferences_data = cast( - Mapping[str, Any], store.load(NO_AUTH_USER_PREFERENCES_KEY) + Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY) ) return UserPreferences(**preferences_data) except ConfigNotFoundError: - return UserPreferences(chosen_assistants=None) + return UserPreferences(chosen_assistants=None, default_model=None) def fetch_no_auth_user(store: DynamicConfigStore) -> UserInfo: diff --git a/backend/danswer/auth/schemas.py b/backend/danswer/auth/schemas.py index 79d9a7f8098..9e0553991cc 100644 --- a/backend/danswer/auth/schemas.py +++ b/backend/danswer/auth/schemas.py @@ -5,8 +5,20 @@ class UserRole(str, Enum): + """ + User roles + - Basic can't perform any admin actions + - Admin can perform all admin actions + - Curator can perform admin actions for + groups they are curators of + - Global Curator can perform admin actions + for all groups they are a member of + """ + BASIC = "basic" ADMIN = "admin" + CURATOR = "curator" + GLOBAL_CURATOR = "global_curator" class UserStatus(str, Enum): diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 479cf07df3d..c3851ff1990 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -1,11 +1,15 @@ import smtplib import uuid from collections.abc import AsyncGenerator +from datetime import datetime +from datetime import timezone from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from typing import Optional from typing import Tuple +from email_validator import EmailNotValidError +from email_validator import validate_email from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException @@ -38,6 +42,7 @@ from danswer.configs.app_configs import SMTP_PORT from danswer.configs.app_configs import SMTP_SERVER from danswer.configs.app_configs import SMTP_USER +from danswer.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import VALID_EMAIL_DOMAINS from danswer.configs.app_configs import WEB_DOMAIN @@ -50,26 +55,50 @@ from danswer.db.auth import get_user_count from danswer.db.auth import get_user_db from danswer.db.engine import get_session +from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import AccessToken from danswer.db.models import User +from danswer.db.users import get_user_by_email from danswer.utils.logger import setup_logger from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType -from danswer.utils.variable_functionality import ( - fetch_versioned_implementation, -) - +from danswer.utils.variable_functionality import fetch_versioned_implementation logger = setup_logger() +def validate_curator_request(groups: list | None, is_public: bool) -> None: + if is_public: + detail = "Curators cannot create public objects" + logger.error(detail) + raise HTTPException( + status_code=401, + detail=detail, + ) + if not groups: + detail = "Curators must specify 1+ groups" + logger.error(detail) + raise HTTPException( + status_code=401, + detail=detail, + ) + + +def is_user_admin(user: User | None) -> bool: + if AUTH_TYPE == AuthType.DISABLED: + return True + if user and user.role == UserRole.ADMIN: + return True + return False + + def verify_auth_setting() -> None: if AUTH_TYPE not in [AuthType.DISABLED, AuthType.BASIC, AuthType.GOOGLE_OAUTH]: raise ValueError( "User must choose a valid user authentication method: " "disabled, basic, or google_oauth" ) - logger.info(f"Using Auth Type: {AUTH_TYPE.value}") + logger.notice(f"Using Auth Type: {AUTH_TYPE.value}") def get_display_email(email: str | None, space_less: bool = False) -> str: @@ -92,10 +121,36 @@ def user_needs_to_be_verified() -> bool: return AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION -def verify_email_in_whitelist(email: str) -> None: +def verify_email_is_invited(email: str) -> None: whitelist = get_invited_users() - if (whitelist and email not in whitelist) or not email: - raise PermissionError("User not on allowed user whitelist") + if not whitelist: + return + + if not email: + raise PermissionError("Email must be specified") + + email_info = validate_email(email) # can raise EmailNotValidError + + for email_whitelist in whitelist: + try: + # normalized emails are now being inserted into the db + # we can remove this normalization on read after some time has passed + email_info_whitelist = validate_email(email_whitelist) + except EmailNotValidError: + continue + + # oddly, normalization does not include lowercasing the user part of the + # email address ... which we want to allow + if email_info.normalized.lower() == email_info_whitelist.normalized.lower(): + return + + raise PermissionError("User not on allowed user whitelist") + + +def verify_email_in_whitelist(email: str) -> None: + with Session(get_sqlalchemy_engine()) as db_session: + if not get_user_by_email(email, db_session): + verify_email_is_invited(email) def verify_email_domain(email: str) -> None: @@ -147,7 +202,7 @@ async def create( safe: bool = False, request: Optional[Request] = None, ) -> models.UP: - verify_email_in_whitelist(user_create.email) + verify_email_is_invited(user_create.email) verify_email_domain(user_create.email) if hasattr(user_create, "role"): user_count = await get_user_count() @@ -172,7 +227,7 @@ async def oauth_callback( ) -> models.UOAP: verify_email_domain(account_email) - return await super().oauth_callback( # type: ignore + user = await super().oauth_callback( # type: ignore oauth_name=oauth_name, access_token=access_token, account_id=account_id, @@ -184,10 +239,23 @@ async def oauth_callback( is_verified_by_default=is_verified_by_default, ) + # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to + # re-authenticate that frequently, so by default this is disabled + if expires_at and TRACK_EXTERNAL_IDP_EXPIRY: + oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc) + await self.user_db.update(user, update_dict={"oidc_expiry": oidc_expiry}) + + # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false` + # otherwise, the oidc expiry will always be old, and the user will never be able to login + if user.oidc_expiry and not TRACK_EXTERNAL_IDP_EXPIRY: + await self.user_db.update(user, update_dict={"oidc_expiry": None}) + + return user + async def on_after_register( self, user: User, request: Optional[Request] = None ) -> None: - logger.info(f"User {user.id} has registered.") + logger.notice(f"User {user.id} has registered.") optional_telemetry( record_type=RecordType.SIGN_UP, data={"action": "create"}, @@ -197,14 +265,14 @@ async def on_after_register( async def on_after_forgot_password( self, user: User, token: str, request: Optional[Request] = None ) -> None: - logger.info(f"User {user.id} has forgot their password. Reset token: {token}") + logger.notice(f"User {user.id} has forgot their password. Reset token: {token}") async def on_after_request_verify( self, user: User, token: str, request: Optional[Request] = None ) -> None: verify_email_domain(user.email) - logger.info( + logger.notice( f"Verification requested for user {user.id}. Verification token: {token}" ) @@ -226,10 +294,12 @@ async def get_user_manager( def get_database_strategy( access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db), ) -> DatabaseStrategy: - return DatabaseStrategy( + strategy = DatabaseStrategy( access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS # type: ignore ) + return strategy + auth_backend = AuthenticationBackend( name="database", @@ -326,6 +396,12 @@ async def double_check_user( detail="Access denied. User is not verified.", ) + if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied. User's OIDC token has expired.", + ) + return user @@ -335,6 +411,28 @@ async def current_user( return await double_check_user(user) +async def current_curator_or_admin_user( + user: User | None = Depends(current_user), +) -> User | None: + if DISABLE_AUTH: + return None + + if not user or not hasattr(user, "role"): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied. User is not authenticated or lacks role information.", + ) + + allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN} + if user.role not in allowed_roles: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied. User is not a curator or admin.", + ) + + return user + + async def current_admin_user(user: User | None = Depends(current_user)) -> User | None: if DISABLE_AUTH: return None @@ -342,6 +440,12 @@ async def current_admin_user(user: User | None = Depends(current_user)) -> User if not user or not hasattr(user, "role") or user.role != UserRole.ADMIN: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail="Access denied. User is not an admin.", + detail="Access denied. User must be an admin to perform this action.", ) + return user + + +def get_default_admin_user_emails_() -> list[str]: + # No default seeding available for Danswer MIT + return [] diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py index 1a678ea11fa..1c0e949d068 100644 --- a/backend/danswer/background/celery/celery_app.py +++ b/backend/danswer/background/celery/celery_app.py @@ -1,10 +1,16 @@ +import json from datetime import timedelta +from typing import Any from typing import cast from celery import Celery # type: ignore +from celery.contrib.abortable import AbortableTask # type: ignore +from celery.exceptions import TaskRevokedError +from sqlalchemy import text from sqlalchemy.orm import Session from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector +from danswer.background.celery.celery_utils import should_kick_off_deletion_of_cc_pair from danswer.background.celery.celery_utils import should_prune_cc_pair from danswer.background.celery.celery_utils import should_sync_doc_set from danswer.background.connector_deletion import delete_connector_credential_pair @@ -14,6 +20,8 @@ from danswer.background.task_utils import name_cc_prune_task from danswer.background.task_utils import name_document_set_sync_task from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import POSTGRES_CELERY_APP_NAME +from danswer.configs.constants import PostgresAdvisoryLocks from danswer.connectors.factory import instantiate_connector from danswer.connectors.models import InputType from danswer.db.connector_credential_pair import get_connector_credential_pair @@ -38,7 +46,9 @@ logger = setup_logger() -connection_string = build_connection_string(db_api=SYNC_DB_API) +connection_string = build_connection_string( + db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME +) celery_broker_url = f"sqla+{connection_string}" celery_backend_url = f"db+{connection_string}" celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url) @@ -100,7 +110,7 @@ def cleanup_connector_credential_pair_task( @build_celery_task_wrapper(name_cc_prune_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) def prune_documents_task(connector_id: int, credential_id: int) -> None: - """connector pruning task. For a cc pair, this task pulls all docuement IDs from the source + """connector pruning task. For a cc pair, this task pulls all document IDs from the source and compares those IDs to locally stored documents and deletes all locally stored IDs missing from the most recently pulled document ID list""" with Session(get_sqlalchemy_engine()) as db_session: @@ -278,6 +288,141 @@ def check_for_document_sets_sync_task() -> None: ) +@celery_app.task( + name="check_for_cc_pair_deletion_task", + soft_time_limit=JOB_TIMEOUT, +) +def check_for_cc_pair_deletion_task() -> None: + """Runs periodically to check if any deletion tasks should be run""" + with Session(get_sqlalchemy_engine()) as db_session: + # check if any document sets are not synced + cc_pairs = get_connector_credential_pairs(db_session) + for cc_pair in cc_pairs: + if should_kick_off_deletion_of_cc_pair(cc_pair, db_session): + logger.notice(f"Deleting the {cc_pair.name} connector credential pair") + cleanup_connector_credential_pair_task.apply_async( + kwargs=dict( + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + ), + ) + + +@celery_app.task( + name="kombu_message_cleanup_task", + soft_time_limit=JOB_TIMEOUT, + bind=True, + base=AbortableTask, +) +def kombu_message_cleanup_task(self: Any) -> int: + """Runs periodically to clean up the kombu_message table""" + + # we will select messages older than this amount to clean up + KOMBU_MESSAGE_CLEANUP_AGE = 7 # days + KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000 + + ctx = {} + ctx["last_processed_id"] = 0 + ctx["deleted"] = 0 + ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE + ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT + with Session(get_sqlalchemy_engine()) as db_session: + # Exit the task if we can't take the advisory lock + result = db_session.execute( + text("SELECT pg_try_advisory_lock(:id)"), + {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value}, + ).scalar() + if not result: + return 0 + + while True: + if self.is_aborted(): + raise TaskRevokedError("kombu_message_cleanup_task was aborted.") + + b = kombu_message_cleanup_task_helper(ctx, db_session) + if not b: + break + + db_session.commit() + + if ctx["deleted"] > 0: + logger.info(f"Deleted {ctx['deleted']} orphaned messages from kombu_message.") + + return ctx["deleted"] + + +def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool: + """ + Helper function to clean up old messages from the `kombu_message` table that are no longer relevant. + + This function retrieves messages from the `kombu_message` table that are no longer visible and + older than a specified interval. It checks if the corresponding task_id exists in the + `celery_taskmeta` table. If the task_id does not exist, the message is deleted. + + Args: + ctx (dict): A context dictionary containing configuration parameters such as: + - 'cleanup_age' (int): The age in days after which messages are considered old. + - 'page_limit' (int): The maximum number of messages to process in one batch. + - 'last_processed_id' (int): The ID of the last processed message to handle pagination. + - 'deleted' (int): A counter to track the number of deleted messages. + db_session (Session): The SQLAlchemy database session for executing queries. + + Returns: + bool: Returns True if there are more rows to process, False if not. + """ + + query = text( + """ + SELECT id, timestamp, payload + FROM kombu_message WHERE visible = 'false' + AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days + AND id > :last_processed_id + ORDER BY id + LIMIT :page_limit +""" + ) + kombu_messages = db_session.execute( + query, + { + "interval_days": f"{ctx['cleanup_age']} days", + "page_limit": ctx["page_limit"], + "last_processed_id": ctx["last_processed_id"], + }, + ).fetchall() + + if len(kombu_messages) == 0: + return False + + for msg in kombu_messages: + payload = json.loads(msg[2]) + task_id = payload["headers"]["id"] + + # Check if task_id exists in celery_taskmeta + task_exists = db_session.execute( + text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"), + {"task_id": task_id}, + ).fetchone() + + # If task_id does not exist, delete the message + if not task_exists: + result = db_session.execute( + text("DELETE FROM kombu_message WHERE id = :message_id"), + {"message_id": msg[0]}, + ) + if result.rowcount > 0: # type: ignore + ctx["deleted"] += 1 + else: + task_name = payload["headers"]["task"] + logger.warning( + f"Message found for task older than {ctx['cleanup_age']} days. " + f"id={task_id} name={task_name}" + ) + + ctx["last_processed_id"] = msg[0] + + return True + + @celery_app.task( name="check_for_prune_task", soft_time_limit=JOB_TIMEOUT, @@ -313,6 +458,12 @@ def check_for_prune_task() -> None: "task": "check_for_document_sets_sync_task", "schedule": timedelta(seconds=5), }, + "check-for-cc-pair-deletion": { + "task": "check_for_cc_pair_deletion_task", + # don't need to check too often, since we kick off a deletion initially + # during the API call that actually marks the CC pair for deletion + "schedule": timedelta(minutes=1), + }, } celery_app.conf.beat_schedule.update( { @@ -322,3 +473,11 @@ def check_for_prune_task() -> None: }, } ) +celery_app.conf.beat_schedule.update( + { + "kombu-message-cleanup": { + "task": "kombu_message_cleanup_task", + "schedule": timedelta(seconds=3600), + }, + } +) diff --git a/backend/danswer/background/celery/celery_utils.py b/backend/danswer/background/celery/celery_utils.py index 6b9b5a89603..e4d4d13bb1d 100644 --- a/backend/danswer/background/celery/celery_utils.py +++ b/backend/danswer/background/celery/celery_utils.py @@ -6,8 +6,8 @@ from danswer.background.task_utils import name_cc_cleanup_task from danswer.background.task_utils import name_cc_prune_task from danswer.background.task_utils import name_document_set_sync_task +from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE -from danswer.configs.app_configs import PREVENT_SIMULTANEOUS_PRUNING from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) @@ -16,10 +16,14 @@ from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.models import Document +from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.engine import get_db_current_time +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import Connector +from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential from danswer.db.models import DocumentSet +from danswer.db.models import TaskQueueState from danswer.db.tasks import check_task_is_live_and_not_timed_out from danswer.db.tasks import get_latest_task from danswer.db.tasks import get_latest_task_by_type @@ -29,24 +33,54 @@ logger = setup_logger() -def get_deletion_status( +def _get_deletion_status( connector_id: int, credential_id: int, db_session: Session -) -> DeletionAttemptSnapshot | None: +) -> TaskQueueState | None: cleanup_task_name = name_cc_cleanup_task( connector_id=connector_id, credential_id=credential_id ) - task_state = get_latest_task(task_name=cleanup_task_name, db_session=db_session) + return get_latest_task(task_name=cleanup_task_name, db_session=db_session) + - if not task_state: +def get_deletion_attempt_snapshot( + connector_id: int, credential_id: int, db_session: Session +) -> DeletionAttemptSnapshot | None: + deletion_task = _get_deletion_status(connector_id, credential_id, db_session) + if not deletion_task: return None return DeletionAttemptSnapshot( connector_id=connector_id, credential_id=credential_id, - status=task_state.status, + status=deletion_task.status, ) +def should_kick_off_deletion_of_cc_pair( + cc_pair: ConnectorCredentialPair, db_session: Session +) -> bool: + if cc_pair.status != ConnectorCredentialPairStatus.DELETING: + return False + + if check_deletion_attempt_is_allowed(cc_pair, db_session): + return False + + deletion_task = _get_deletion_status( + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + db_session=db_session, + ) + if deletion_task and check_task_is_live_and_not_timed_out( + deletion_task, + db_session, + # 1 hour timeout + timeout=60 * 60, + ): + return False + + return True + + def should_sync_doc_set(document_set: DocumentSet, db_session: Session) -> bool: if document_set.is_up_to_date: return False @@ -58,7 +92,7 @@ def should_sync_doc_set(document_set: DocumentSet, db_session: Session) -> bool: logger.info(f"Document set '{document_set.id}' is already syncing. Skipping.") return False - logger.info(f"Document set {document_set.id} syncing now!") + logger.info(f"Document set {document_set.id} syncing now.") return True @@ -80,7 +114,7 @@ def should_prune_cc_pair( return True return False - if PREVENT_SIMULTANEOUS_PRUNING: + if not ALLOW_SIMULTANEOUS_PRUNING: pruning_type_task_name = name_cc_prune_task() last_pruning_type_task = get_latest_task_by_type( pruning_type_task_name, db_session @@ -89,11 +123,9 @@ def should_prune_cc_pair( if last_pruning_type_task and check_task_is_live_and_not_timed_out( last_pruning_type_task, db_session ): - logger.info("Another Connector is already pruning. Skipping.") return False if check_task_is_live_and_not_timed_out(last_pruning_task, db_session): - logger.info(f"Connector '{connector.name}' is already pruning. Skipping.") return False if not last_pruning_task.start_time: diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py index 28c58f02dfc..90883564910 100644 --- a/backend/danswer/background/connector_deletion.py +++ b/backend/danswer/background/connector_deletion.py @@ -10,8 +10,6 @@ connector / credential pair from the access list (6) delete all relevant entries from postgres """ -import time - from sqlalchemy.orm import Session from danswer.access.access import get_access_for_documents @@ -24,10 +22,8 @@ from danswer.db.document import get_document_connector_cnts from danswer.db.document import get_documents_for_connector_credential_pair from danswer.db.document import prepare_to_modify_documents -from danswer.db.document_set import get_document_sets_by_ids -from danswer.db.document_set import ( - mark_cc_pair__document_set_relationships_to_be_deleted__no_commit, -) +from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit +from danswer.db.document_set import fetch_document_sets_for_documents from danswer.db.engine import get_sqlalchemy_engine from danswer.db.index_attempt import delete_index_attempts from danswer.db.models import ConnectorCredentialPair @@ -35,6 +31,10 @@ from danswer.document_index.interfaces import UpdateRequest from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import ( + fetch_versioned_implementation_with_fallback, +) +from danswer.utils.variable_functionality import noop_fallback logger = setup_logger() @@ -78,25 +78,37 @@ def delete_connector_credential_pair_batch( document_ids_to_update = [ document_id for document_id, cnt in document_connector_cnts if cnt > 1 ] + + # maps document id to list of document set names + new_doc_sets_for_documents: dict[str, set[str]] = { + document_id_and_document_set_names_tuple[0]: set( + document_id_and_document_set_names_tuple[1] + ) + for document_id_and_document_set_names_tuple in fetch_document_sets_for_documents( + db_session=db_session, + document_ids=document_ids_to_update, + ) + } + + # determine future ACLs for documents in batch access_for_documents = get_access_for_documents( document_ids=document_ids_to_update, db_session=db_session, - cc_pair_to_delete=ConnectorCredentialPairIdentifier( - connector_id=connector_id, - credential_id=credential_id, - ), ) + + # update Vespa + logger.debug(f"Updating documents: {document_ids_to_update}") update_requests = [ UpdateRequest( document_ids=[document_id], access=access, + document_sets=new_doc_sets_for_documents[document_id], ) for document_id, access in access_for_documents.items() ] - logger.debug(f"Updating documents: {document_ids_to_update}") - document_index.update(update_requests=update_requests) + # clean up Postgres delete_document_by_connector_credential_pair__no_commit( db_session=db_session, document_ids=document_ids_to_update, @@ -108,48 +120,6 @@ def delete_connector_credential_pair_batch( db_session.commit() -def cleanup_synced_entities( - cc_pair: ConnectorCredentialPair, db_session: Session -) -> None: - """Updates the document sets associated with the connector / credential pair, - then relies on the document set sync script to kick off Celery jobs which will - sync these updates to Vespa. - - Waits until the document sets are synced before returning.""" - logger.info(f"Cleaning up Document Sets for CC Pair with ID: '{cc_pair.id}'") - document_sets_ids_to_sync = list( - mark_cc_pair__document_set_relationships_to_be_deleted__no_commit( - cc_pair_id=cc_pair.id, - db_session=db_session, - ) - ) - db_session.commit() - - # wait till all document sets are synced before continuing - while True: - all_synced = True - document_sets = get_document_sets_by_ids( - db_session=db_session, document_set_ids=document_sets_ids_to_sync - ) - for document_set in document_sets: - if not document_set.is_up_to_date: - all_synced = False - - if all_synced: - break - - # wait for 30 seconds before checking again - db_session.commit() # end transaction - logger.info( - f"Document sets '{document_sets_ids_to_sync}' not synced yet, waiting 30s" - ) - time.sleep(30) - - logger.info( - f"Finished cleaning up Document Sets for CC Pair with ID: '{cc_pair.id}'" - ) - - def delete_connector_credential_pair( db_session: Session, document_index: DocumentIndex, @@ -177,17 +147,33 @@ def delete_connector_credential_pair( ) num_docs_deleted += len(documents) - # Clean up document sets / access information from Postgres - # and sync these updates to Vespa - # TODO: add user group cleanup with `fetch_versioned_implementation` - cleanup_synced_entities(cc_pair, db_session) - # clean up the rest of the related Postgres entities + # index attempts delete_index_attempts( db_session=db_session, connector_id=connector_id, credential_id=credential_id, ) + + # document sets + delete_document_set_cc_pair_relationship__no_commit( + db_session=db_session, + connector_id=connector_id, + credential_id=credential_id, + ) + + # user groups + cleanup_user_groups = fetch_versioned_implementation_with_fallback( + "danswer.db.user_group", + "delete_user_group_cc_pair_relationship__no_commit", + noop_fallback, + ) + cleanup_user_groups( + cc_pair_id=cc_pair.id, + db_session=db_session, + ) + + # finally, delete the cc-pair delete_connector_credential_pair__no_commit( db_session=db_session, connector_id=connector_id, @@ -199,11 +185,11 @@ def delete_connector_credential_pair( connector_id=connector_id, ) if not connector or not len(connector.credentials): - logger.debug("Found no credentials left for connector, deleting connector") + logger.info("Found no credentials left for connector, deleting connector") db_session.delete(connector) db_session.commit() - logger.info( + logger.notice( "Successfully deleted connector_credential_pair with connector_id:" f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs." ) diff --git a/backend/danswer/background/indexing/job_client.py b/backend/danswer/background/indexing/job_client.py index 72919d690a4..68d706895fd 100644 --- a/backend/danswer/background/indexing/job_client.py +++ b/backend/danswer/background/indexing/job_client.py @@ -41,6 +41,12 @@ def _initializer( return func(*args, **kwargs) +def _run_in_process( + func: Callable, args: list | tuple, kwargs: dict[str, Any] | None = None +) -> None: + _initializer(func, args, kwargs) + + @dataclass class SimpleJob: """Drop in replacement for `dask.distributed.Future`""" @@ -113,7 +119,7 @@ def submit(self, func: Callable, *args: Any, pure: bool = True) -> SimpleJob | N job_id = self.job_id_counter self.job_id_counter += 1 - process = Process(target=_initializer(func=func, args=args), daemon=True) + process = Process(target=_run_in_process, args=(func, args), daemon=True) job = SimpleJob(id=job_id, process=process) process.start() diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index fa684f020b6..a98f4e1f5ad 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -7,20 +7,21 @@ from sqlalchemy.orm import Session from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt +from danswer.background.indexing.tracer import DanswerTracer +from danswer.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD +from danswer.configs.app_configs import INDEXING_TRACER_INTERVAL from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET +from danswer.connectors.connector_runner import ConnectorRunner from danswer.connectors.factory import instantiate_connector -from danswer.connectors.interfaces import GenerateDocumentsOutput -from danswer.connectors.interfaces import LoadConnector -from danswer.connectors.interfaces import PollConnector from danswer.connectors.models import IndexAttemptMetadata -from danswer.connectors.models import InputType -from danswer.db.connector import disable_connector from danswer.db.connector_credential_pair import get_last_successful_attempt_time from danswer.db.connector_credential_pair import update_connector_credential_pair from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import mark_attempt_failed -from danswer.db.index_attempt import mark_attempt_in_progress__no_commit +from danswer.db.index_attempt import mark_attempt_in_progress +from danswer.db.index_attempt import mark_attempt_partially_succeeded from danswer.db.index_attempt import mark_attempt_succeeded from danswer.db.index_attempt import update_docs_indexed from danswer.db.models import IndexAttempt @@ -35,13 +36,15 @@ logger = setup_logger() +INDEXING_TRACER_NUM_PRINT_ENTRIES = 5 -def _get_document_generator( + +def _get_connector_runner( db_session: Session, attempt: IndexAttempt, start_time: datetime, end_time: datetime, -) -> GenerateDocumentsOutput: +) -> ConnectorRunner: """ NOTE: `start_time` and `end_time` are only used for poll connectors @@ -49,43 +52,31 @@ def _get_document_generator( are the complete list of existing documents of the connector. If the task of type LOAD_STATE, the list will be considered complete and otherwise incomplete. """ - task = attempt.connector.input_type + task = attempt.connector_credential_pair.connector.input_type try: runnable_connector = instantiate_connector( - attempt.connector.source, + attempt.connector_credential_pair.connector.source, task, - attempt.connector.connector_specific_config, - attempt.credential, + attempt.connector_credential_pair.connector.connector_specific_config, + attempt.connector_credential_pair.credential, db_session, ) except Exception as e: logger.exception(f"Unable to instantiate connector due to {e}") - disable_connector(attempt.connector.id, db_session) - raise e - - if task == InputType.LOAD_STATE: - assert isinstance(runnable_connector, LoadConnector) - doc_batch_generator = runnable_connector.load_from_state() - - elif task == InputType.POLL: - assert isinstance(runnable_connector, PollConnector) - if attempt.connector_id is None or attempt.credential_id is None: - raise ValueError( - f"Polling attempt {attempt.id} is missing connector_id or credential_id, " - f"can't fetch time range." - ) - - logger.info(f"Polling for updates between {start_time} and {end_time}") - doc_batch_generator = runnable_connector.poll_source( - start=start_time.timestamp(), end=end_time.timestamp() + # since we failed to even instantiate the connector, we pause the CCPair since + # it will never succeed + update_connector_credential_pair( + db_session=db_session, + connector_id=attempt.connector_credential_pair.connector.id, + credential_id=attempt.connector_credential_pair.credential.id, + status=ConnectorCredentialPairStatus.PAUSED, ) + raise e - else: - # Event types cannot be handled by a background type - raise RuntimeError(f"Invalid task type: {task}") - - return doc_batch_generator + return ConnectorRunner( + connector=runnable_connector, time_range=(start_time, end_time) + ) def _run_indexing( @@ -99,46 +90,62 @@ def _run_indexing( """ start_time = time.time() - db_embedding_model = index_attempt.embedding_model - index_name = db_embedding_model.index_name + search_settings = index_attempt.search_settings + index_name = search_settings.index_name # Only update cc-pair status for primary index jobs # Secondary index syncs at the end when swapping - is_primary = index_attempt.embedding_model.status == IndexModelStatus.PRESENT + is_primary = search_settings.status == IndexModelStatus.PRESENT # Indexing is only done into one index at a time document_index = get_default_document_index( primary_index_name=index_name, secondary_index_name=None ) - embedding_model = DefaultIndexingEmbedder( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - query_prefix=db_embedding_model.query_prefix, - passage_prefix=db_embedding_model.passage_prefix, + embedding_model = DefaultIndexingEmbedder.from_db_search_settings( + search_settings=search_settings ) indexing_pipeline = build_indexing_pipeline( + attempt_id=index_attempt.id, embedder=embedding_model, document_index=document_index, ignore_time_skip=index_attempt.from_beginning - or (db_embedding_model.status == IndexModelStatus.FUTURE), + or (search_settings.status == IndexModelStatus.FUTURE), db_session=db_session, ) - db_connector = index_attempt.connector - db_credential = index_attempt.credential + db_cc_pair = index_attempt.connector_credential_pair + db_connector = index_attempt.connector_credential_pair.connector + db_credential = index_attempt.connector_credential_pair.credential + last_successful_index_time = ( - 0.0 - if index_attempt.from_beginning - else get_last_successful_attempt_time( - connector_id=db_connector.id, - credential_id=db_credential.id, - embedding_model=index_attempt.embedding_model, - db_session=db_session, + db_connector.indexing_start.timestamp() + if index_attempt.from_beginning and db_connector.indexing_start is not None + else ( + 0.0 + if index_attempt.from_beginning + else get_last_successful_attempt_time( + connector_id=db_connector.id, + credential_id=db_credential.id, + search_settings=index_attempt.search_settings, + db_session=db_session, + ) ) ) + if INDEXING_TRACER_INTERVAL > 0: + logger.debug(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}") + tracer = DanswerTracer() + tracer.start() + tracer.snap() + + index_attempt_md = IndexAttemptMetadata( + connector_id=db_connector.id, + credential_id=db_credential.id, + ) + + batch_num = 0 net_doc_change = 0 document_count = 0 chunk_count = 0 @@ -157,7 +164,7 @@ def _run_indexing( datetime(1970, 1, 1, tzinfo=timezone.utc), ) - doc_batch_generator = _get_document_generator( + connector_runner = _get_connector_runner( db_session=db_session, attempt=index_attempt, start_time=window_start, @@ -165,15 +172,23 @@ def _run_indexing( ) all_connector_doc_ids: set[str] = set() - for doc_batch in doc_batch_generator: + + tracer_counter = 0 + if INDEXING_TRACER_INTERVAL > 0: + tracer.snap() + for doc_batch in connector_runner.run(): # Check if connector is disabled mid run and stop if so unless it's the secondary # index being built. We want to populate it even for paused connectors # Often paused connectors are sources that aren't updated frequently but the # contents still need to be initially pulled. db_session.refresh(db_connector) if ( - db_connector.disabled - and db_embedding_model.status != IndexModelStatus.FUTURE + ( + db_cc_pair.status == ConnectorCredentialPairStatus.PAUSED + and search_settings.status != IndexModelStatus.FUTURE + ) + # if it's deleting, we don't care if this is a secondary index + or db_cc_pair.status == ConnectorCredentialPairStatus.DELETING ): # let the `except` block handle this raise RuntimeError("Connector was disabled mid run") @@ -183,17 +198,30 @@ def _run_indexing( # Likely due to user manually disabling it or model swap raise RuntimeError("Index Attempt was canceled") - logger.debug( - f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}" - ) + batch_description = [] + for doc in doc_batch: + batch_description.append(doc.to_short_descriptor()) + + doc_size = 0 + for section in doc.sections: + doc_size += len(section.text) + if doc_size > INDEXING_SIZE_WARNING_THRESHOLD: + logger.warning( + f"Document size: doc='{doc.to_short_descriptor()}' " + f"size={doc_size} " + f"threshold={INDEXING_SIZE_WARNING_THRESHOLD}" + ) + + logger.debug(f"Indexing batch of documents: {batch_description}") + + index_attempt_md.batch_num = batch_num + 1 # use 1-index for this new_docs, total_batch_chunks = indexing_pipeline( - documents=doc_batch, - index_attempt_metadata=IndexAttemptMetadata( - connector_id=db_connector.id, - credential_id=db_credential.id, - ), + document_batch=doc_batch, + index_attempt_metadata=index_attempt_md, ) + + batch_num += 1 net_doc_change += new_docs chunk_count += total_batch_chunks document_count += len(doc_batch) @@ -215,6 +243,17 @@ def _run_indexing( docs_removed_from_index=0, ) + tracer_counter += 1 + if ( + INDEXING_TRACER_INTERVAL > 0 + and tracer_counter % INDEXING_TRACER_INTERVAL == 0 + ): + logger.debug( + f"Running trace comparison for batch {tracer_counter}. interval={INDEXING_TRACER_INTERVAL}" + ) + tracer.snap() + tracer.log_previous_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES) + run_end_dt = window_end if is_primary: update_connector_credential_pair( @@ -225,7 +264,7 @@ def _run_indexing( run_dt=run_end_dt, ) except Exception as e: - logger.info( + logger.exception( f"Connector run ran into exception after elapsed time: {time.time() - start_time} seconds" ) # Only mark the attempt as a complete failure if this is the first indexing window. @@ -237,7 +276,7 @@ def _run_indexing( # to give better clarity in the UI, as the next run will never happen. if ( ind == 0 - or db_connector.disabled + or not db_cc_pair.status.is_active() or index_attempt.status != IndexingStatus.IN_PROGRESS ): mark_attempt_failed( @@ -249,17 +288,66 @@ def _run_indexing( if is_primary: update_connector_credential_pair( db_session=db_session, - connector_id=index_attempt.connector.id, - credential_id=index_attempt.credential.id, + connector_id=db_connector.id, + credential_id=db_credential.id, net_docs=net_doc_change, ) + + if INDEXING_TRACER_INTERVAL > 0: + tracer.stop() raise e # break => similar to success case. As mentioned above, if the next run fails for the same # reason it will then be marked as a failure break - mark_attempt_succeeded(index_attempt, db_session) + if INDEXING_TRACER_INTERVAL > 0: + logger.debug( + f"Running trace comparison between start and end of indexing. {tracer_counter} batches processed." + ) + tracer.snap() + tracer.log_first_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES) + tracer.stop() + logger.debug("Memory tracer stopped.") + + if ( + index_attempt_md.num_exceptions > 0 + and index_attempt_md.num_exceptions >= batch_num + ): + mark_attempt_failed( + index_attempt, + db_session, + failure_reason="All batches exceptioned.", + ) + if is_primary: + update_connector_credential_pair( + db_session=db_session, + connector_id=index_attempt.connector_credential_pair.connector.id, + credential_id=index_attempt.connector_credential_pair.credential.id, + ) + raise Exception( + f"Connector failed - All batches exceptioned: batches={batch_num}" + ) + + elapsed_time = time.time() - start_time + + if index_attempt_md.num_exceptions == 0: + mark_attempt_succeeded(index_attempt, db_session) + logger.info( + f"Connector succeeded: " + f"docs={document_count} chunks={chunk_count} elapsed={elapsed_time:.2f}s" + ) + else: + mark_attempt_partially_succeeded(index_attempt, db_session) + logger.info( + f"Connector completed with some errors: " + f"exceptions={index_attempt_md.num_exceptions} " + f"batches={batch_num} " + f"docs={document_count} " + f"chunks={chunk_count} " + f"elapsed={elapsed_time:.2f}s" + ) + if is_primary: update_connector_credential_pair( db_session=db_session, @@ -268,13 +356,6 @@ def _run_indexing( run_dt=run_end_dt, ) - logger.info( - f"Indexed or refreshed {document_count} total documents for a total of {chunk_count} indexed chunks" - ) - logger.info( - f"Connector successfully finished, elapsed time: {time.time() - start_time} seconds" - ) - def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexAttempt: # make sure that the index attempt can't change in between checking the @@ -287,6 +368,7 @@ def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexA db_session=db_session, index_attempt_id=index_attempt_id, ) + if attempt is None: raise RuntimeError(f"Unable to find IndexAttempt for ID '{index_attempt_id}'") @@ -297,9 +379,7 @@ def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexA ) # only commit once, to make sure this all happens in a single transaction - mark_attempt_in_progress__no_commit(attempt) - if attempt.embedding_model.status != IndexModelStatus.PRESENT: - db_session.commit() + mark_attempt_in_progress(attempt, db_session) return attempt @@ -322,17 +402,19 @@ def run_indexing_entrypoint(index_attempt_id: int, is_ee: bool = False) -> None: attempt = _prepare_index_attempt(db_session, index_attempt_id) logger.info( - f"Running indexing attempt for connector: '{attempt.connector.name}', " - f"with config: '{attempt.connector.connector_specific_config}', and " - f"with credentials: '{attempt.credential_id}'" + f"Indexing starting: " + f"connector='{attempt.connector_credential_pair.connector.name}' " + f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " + f"credentials='{attempt.connector_credential_pair.connector_id}'" ) _run_indexing(db_session, attempt) logger.info( - f"Completed indexing attempt for connector: '{attempt.connector.name}', " - f"with config: '{attempt.connector.connector_specific_config}', and " - f"with credentials: '{attempt.credential_id}'" + f"Indexing finished: " + f"connector='{attempt.connector_credential_pair.connector.name}' " + f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " + f"credentials='{attempt.connector_credential_pair.connector_id}'" ) except Exception as e: logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}") diff --git a/backend/danswer/background/indexing/tracer.py b/backend/danswer/background/indexing/tracer.py new file mode 100644 index 00000000000..baad9623087 --- /dev/null +++ b/backend/danswer/background/indexing/tracer.py @@ -0,0 +1,77 @@ +import tracemalloc + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +DANSWER_TRACEMALLOC_FRAMES = 10 + + +class DanswerTracer: + def __init__(self) -> None: + self.snapshot_first: tracemalloc.Snapshot | None = None + self.snapshot_prev: tracemalloc.Snapshot | None = None + self.snapshot: tracemalloc.Snapshot | None = None + + def start(self) -> None: + tracemalloc.start(DANSWER_TRACEMALLOC_FRAMES) + + def stop(self) -> None: + tracemalloc.stop() + + def snap(self) -> None: + snapshot = tracemalloc.take_snapshot() + # Filter out irrelevant frames (e.g., from tracemalloc itself or importlib) + snapshot = snapshot.filter_traces( + ( + tracemalloc.Filter(False, tracemalloc.__file__), # Exclude tracemalloc + tracemalloc.Filter( + False, "" + ), # Exclude importlib + tracemalloc.Filter( + False, "" + ), # Exclude external importlib + ) + ) + + if not self.snapshot_first: + self.snapshot_first = snapshot + + if self.snapshot: + self.snapshot_prev = self.snapshot + + self.snapshot = snapshot + + def log_snapshot(self, numEntries: int) -> None: + if not self.snapshot: + return + + stats = self.snapshot.statistics("traceback") + for s in stats[:numEntries]: + logger.debug(f"Tracer snap: {s}") + for line in s.traceback: + logger.debug(f"* {line}") + + @staticmethod + def log_diff( + snap_current: tracemalloc.Snapshot, + snap_previous: tracemalloc.Snapshot, + numEntries: int, + ) -> None: + stats = snap_current.compare_to(snap_previous, "traceback") + for s in stats[:numEntries]: + logger.debug(f"Tracer diff: {s}") + for line in s.traceback.format(): + logger.debug(f"* {line}") + + def log_previous_diff(self, numEntries: int) -> None: + if not self.snapshot or not self.snapshot_prev: + return + + DanswerTracer.log_diff(self.snapshot, self.snapshot_prev, numEntries) + + def log_first_diff(self, numEntries: int) -> None: + if not self.snapshot or not self.snapshot_first: + return + + DanswerTracer.log_diff(self.snapshot, self.snapshot_first, numEntries) diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 9ca65f8b33a..28abb481143 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -16,24 +16,29 @@ from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP from danswer.configs.app_configs import NUM_INDEXING_WORKERS +from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS +from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME from danswer.db.connector import fetch_connectors -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model +from danswer.db.connector_credential_pair import fetch_connector_credential_pairs from danswer.db.engine import get_db_current_time from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import init_sqlalchemy_engine from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import get_inprogress_index_attempts -from danswer.db.index_attempt import get_last_attempt +from danswer.db.index_attempt import get_last_attempt_for_cc_pair from danswer.db.index_attempt import get_not_started_index_attempts from danswer.db.index_attempt import mark_attempt_failed -from danswer.db.models import Connector -from danswer.db.models import EmbeddingModel +from danswer.db.models import ConnectorCredentialPair from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.db.models import IndexModelStatus +from danswer.db.models import SearchSettings +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings from danswer.db.swap_index import check_index_swap -from danswer.search.search_nlp_models import warm_up_encoders +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import global_version from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable @@ -41,6 +46,7 @@ from shared_configs.configs import LOG_LEVEL from shared_configs.configs import MODEL_SERVER_PORT + logger = setup_logger() # If the indexing dies, it's most likely due to resource constraints, @@ -53,41 +59,64 @@ def _should_create_new_indexing( - connector: Connector, + cc_pair: ConnectorCredentialPair, last_index: IndexAttempt | None, - model: EmbeddingModel, + search_settings_instance: SearchSettings, secondary_index_building: bool, db_session: Session, ) -> bool: + connector = cc_pair.connector + # User can still manually create single indexing attempts via the UI for the # currently in use index if DISABLE_INDEX_UPDATE_ON_SWAP: - if model.status == IndexModelStatus.PRESENT and secondary_index_building: + if ( + search_settings_instance.status == IndexModelStatus.PRESENT + and secondary_index_building + ): return False # When switching over models, always index at least once - if model.status == IndexModelStatus.FUTURE and not last_index: - if connector.id == 0: # Ingestion API - return False + if search_settings_instance.status == IndexModelStatus.FUTURE: + if last_index: + # No new index if the last index attempt succeeded + # Once is enough. The model will never be able to swap otherwise. + if last_index.status == IndexingStatus.SUCCESS: + return False + + # No new index if the last index attempt is waiting to start + if last_index.status == IndexingStatus.NOT_STARTED: + return False + + # No new index if the last index attempt is running + if last_index.status == IndexingStatus.IN_PROGRESS: + return False + else: + if connector.id == 0: # Ingestion API + return False return True - # If the connector is disabled, don't index - # NOTE: during an embedding model switch over, we ignore this - # and index the disabled connectors as well (which is why this if - # statement is below the first condition above) - if connector.disabled: + # If the connector is paused or is the ingestion API, don't index + # NOTE: during an embedding model switch over, the following logic + # is bypassed by the above check for a future model + if not cc_pair.status.is_active() or connector.id == 0: return False - if connector.refresh_freq is None: - return False if not last_index: return True - # Only one scheduled job per connector at a time - # Can schedule another one if the current one is already running however - # Because the currently running one will not be until the latest time - # Note, this last index is for the given embedding model - if last_index.status == IndexingStatus.NOT_STARTED: + if connector.refresh_freq is None: + return False + + # Only one scheduled/ongoing job per connector at a time + # this prevents cases where + # (1) the "latest" index_attempt is scheduled so we show + # that in the UI despite another index_attempt being in-progress + # (2) multiple scheduled index_attempts at a time + if ( + last_index.status == IndexingStatus.NOT_STARTED + or last_index.status == IndexingStatus.IN_PROGRESS + ): return False current_db_time = get_db_current_time(db_session) @@ -95,24 +124,14 @@ def _should_create_new_indexing( return time_since_index.total_seconds() >= connector.refresh_freq -def _is_indexing_job_marked_as_finished(index_attempt: IndexAttempt | None) -> bool: - if index_attempt is None: - return False - - return ( - index_attempt.status == IndexingStatus.FAILED - or index_attempt.status == IndexingStatus.SUCCESS - ) - - def _mark_run_failed( db_session: Session, index_attempt: IndexAttempt, failure_reason: str ) -> None: """Marks the `index_attempt` row as failed + updates the ` connector_credential_pair` to reflect that the run failed""" logger.warning( - f"Marking in-progress attempt 'connector: {index_attempt.connector_id}, " - f"credential: {index_attempt.credential_id}' as failed due to {failure_reason}" + f"Marking in-progress attempt 'connector: {index_attempt.connector_credential_pair.connector_id}, " + f"credential: {index_attempt.connector_credential_pair.credential_id}' as failed due to {failure_reason}" ) mark_attempt_failed( index_attempt=index_attempt, @@ -131,7 +150,7 @@ def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None: 3. There is not already an ongoing indexing attempt for this pair """ with Session(get_sqlalchemy_engine()) as db_session: - ongoing: set[tuple[int | None, int | None, int]] = set() + ongoing: set[tuple[int | None, int]] = set() for attempt_id in existing_jobs: attempt = get_index_attempt( db_session=db_session, index_attempt_id=attempt_id @@ -144,42 +163,43 @@ def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None: continue ongoing.add( ( - attempt.connector_id, - attempt.credential_id, - attempt.embedding_model_id, + attempt.connector_credential_pair_id, + attempt.search_settings_id, ) ) - embedding_models = [get_current_db_embedding_model(db_session)] - secondary_embedding_model = get_secondary_db_embedding_model(db_session) - if secondary_embedding_model is not None: - embedding_models.append(secondary_embedding_model) - - all_connectors = fetch_connectors(db_session) - for connector in all_connectors: - for association in connector.credentials: - for model in embedding_models: - credential = association.credential - - # Check if there is an ongoing indexing attempt for this connector + credential pair - if (connector.id, credential.id, model.id) in ongoing: - continue - - last_attempt = get_last_attempt( - connector.id, credential.id, model.id, db_session - ) - if not _should_create_new_indexing( - connector=connector, - last_index=last_attempt, - model=model, - secondary_index_building=len(embedding_models) > 1, - db_session=db_session, - ): - continue + # Get the primary search settings + primary_search_settings = get_current_search_settings(db_session) + search_settings = [primary_search_settings] + + # Check for secondary search settings + secondary_search_settings = get_secondary_search_settings(db_session) + if secondary_search_settings is not None: + # If secondary settings exist, add them to the list + search_settings.append(secondary_search_settings) + + all_connector_credential_pairs = fetch_connector_credential_pairs(db_session) + for cc_pair in all_connector_credential_pairs: + for search_settings_instance in search_settings: + # Check if there is an ongoing indexing attempt for this connector credential pair + if (cc_pair.id, search_settings_instance.id) in ongoing: + continue + + last_attempt = get_last_attempt_for_cc_pair( + cc_pair.id, search_settings_instance.id, db_session + ) + if not _should_create_new_indexing( + cc_pair=cc_pair, + last_index=last_attempt, + search_settings_instance=search_settings_instance, + secondary_index_building=len(search_settings) > 1, + db_session=db_session, + ): + continue - create_index_attempt( - connector.id, credential.id, model.id, db_session - ) + create_index_attempt( + cc_pair.id, search_settings_instance.id, db_session + ) def cleanup_indexing_jobs( @@ -196,10 +216,12 @@ def cleanup_indexing_jobs( ) # do nothing for ongoing jobs that haven't been stopped - if not job.done() and not _is_indexing_job_marked_as_finished( - index_attempt - ): - continue + if not job.done(): + if not index_attempt: + continue + + if not index_attempt.is_finished(): + continue if job.status == "error": logger.error(job.exception()) @@ -271,24 +293,28 @@ def kickoff_indexing_jobs( # Don't include jobs waiting in the Dask queue that just haven't started running # Also (rarely) don't include for jobs that started but haven't updated the indexing tables yet with Session(engine) as db_session: + # get_not_started_index_attempts orders its returned results from oldest to newest + # we must process attempts in a FIFO manner to prevent connector starvation new_indexing_attempts = [ - (attempt, attempt.embedding_model) + (attempt, attempt.search_settings) for attempt in get_not_started_index_attempts(db_session) if attempt.id not in existing_jobs ] - logger.info(f"Found {len(new_indexing_attempts)} new indexing tasks.") + logger.debug(f"Found {len(new_indexing_attempts)} new indexing task(s).") if not new_indexing_attempts: return existing_jobs - for attempt, embedding_model in new_indexing_attempts: + indexing_attempt_count = 0 + + for attempt, search_settings in new_indexing_attempts: use_secondary_index = ( - embedding_model.status == IndexModelStatus.FUTURE - if embedding_model is not None + search_settings.status == IndexModelStatus.FUTURE + if search_settings is not None else False ) - if attempt.connector is None: + if attempt.connector_credential_pair.connector is None: logger.warning( f"Skipping index attempt as Connector has been deleted: {attempt}" ) @@ -297,7 +323,7 @@ def kickoff_indexing_jobs( attempt, db_session, failure_reason="Connector is null" ) continue - if attempt.credential is None: + if attempt.connector_credential_pair.credential is None: logger.warning( f"Skipping index attempt as Credential has been deleted: {attempt}" ) @@ -323,33 +349,57 @@ def kickoff_indexing_jobs( ) if run: - secondary_str = "(secondary index) " if use_secondary_index else "" + if indexing_attempt_count == 0: + logger.info( + f"Indexing dispatch starts: pending={len(new_indexing_attempts)}" + ) + + indexing_attempt_count += 1 + secondary_str = " (secondary index)" if use_secondary_index else "" logger.info( - f"Kicked off {secondary_str}" - f"indexing attempt for connector: '{attempt.connector.name}', " - f"with config: '{attempt.connector.connector_specific_config}', and " - f"with credentials: '{attempt.credential_id}'" + f"Indexing dispatched{secondary_str}: " + f"attempt_id={attempt.id} " + f"connector='{attempt.connector_credential_pair.connector.name}' " + f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " + f"credentials='{attempt.connector_credential_pair.credential_id}'" ) existing_jobs_copy[attempt.id] = run + if indexing_attempt_count > 0: + logger.info( + f"Indexing dispatch results: " + f"initial_pending={len(new_indexing_attempts)} " + f"started={indexing_attempt_count} " + f"remaining={len(new_indexing_attempts) - indexing_attempt_count}" + ) + return existing_jobs_copy -def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> None: +def update_loop( + delay: int = 10, + num_workers: int = NUM_INDEXING_WORKERS, + num_secondary_workers: int = NUM_SECONDARY_INDEXING_WORKERS, +) -> None: engine = get_sqlalchemy_engine() with Session(engine) as db_session: check_index_swap(db_session=db_session) - db_embedding_model = get_current_db_embedding_model(db_session) - - # So that the first time users aren't surprised by really slow speed of first - # batch of documents indexed - logger.info("Running a first inference to warm up embedding model") - warm_up_encoders( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - model_server_host=INDEXING_MODEL_SERVER_HOST, - model_server_port=MODEL_SERVER_PORT, - ) + search_settings = get_current_search_settings(db_session) + + # So that the first time users aren't surprised by really slow speed of first + # batch of documents indexed + + if search_settings.provider_type is None: + logger.notice("Running a first inference to warm up embedding model") + embedding_model = EmbeddingModel.from_db_model( + search_settings=search_settings, + server_host=INDEXING_MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) + + warm_up_bi_encoder( + embedding_model=embedding_model, + ) client_primary: Client | SimpleJobClient client_secondary: Client | SimpleJobClient @@ -364,7 +414,7 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non silence_logs=logging.ERROR, ) cluster_secondary = LocalCluster( - n_workers=num_workers, + n_workers=num_secondary_workers, threads_per_worker=1, silence_logs=logging.ERROR, ) @@ -374,18 +424,18 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non client_primary.register_worker_plugin(ResourceLogger()) else: client_primary = SimpleJobClient(n_workers=num_workers) - client_secondary = SimpleJobClient(n_workers=num_workers) + client_secondary = SimpleJobClient(n_workers=num_secondary_workers) existing_jobs: dict[int, Future | SimpleJob] = {} while True: start = time.time() start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S") - logger.info(f"Running update, current UTC time: {start_time_utc}") + logger.debug(f"Running update, current UTC time: {start_time_utc}") if existing_jobs: # TODO: make this debug level once the "no jobs are being scheduled" issue is resolved - logger.info( + logger.debug( "Found existing indexing jobs: " f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}" ) @@ -409,8 +459,9 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non def update__main() -> None: set_is_ee_based_on_env_variable() + init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME) - logger.info("Starting Indexing Loop") + logger.notice("Starting indexing service") update_loop() diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index 7e64a118e0d..b1e4132779b 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -35,14 +35,19 @@ def llm_doc_from_inference_section(inference_section: InferenceSection) -> LlmDo def create_chat_chain( chat_session_id: int, db_session: Session, + prefetch_tool_calls: bool = True, + # Optional id at which we finish processing + stop_at_message_id: int | None = None, ) -> tuple[ChatMessage, list[ChatMessage]]: """Build the linear chain of messages without including the root message""" mainline_messages: list[ChatMessage] = [] + all_chat_messages = get_chat_messages_by_session( chat_session_id=chat_session_id, user_id=None, db_session=db_session, skip_permission_check=True, + prefetch_tool_calls=prefetch_tool_calls, ) id_to_msg = {msg.id: msg for msg in all_chat_messages} @@ -58,7 +63,12 @@ def create_chat_chain( current_message: ChatMessage | None = root_message while current_message is not None: child_msg = current_message.latest_child_message - if not child_msg: + + # Break if at the end of the chain + # or have reached the `final_id` of the submitted message + if not child_msg or ( + stop_at_message_id and current_message.id == stop_at_message_id + ): break current_message = id_to_msg.get(child_msg) diff --git a/backend/danswer/chat/input_prompts.yaml b/backend/danswer/chat/input_prompts.yaml new file mode 100644 index 00000000000..cc7dbe78ea1 --- /dev/null +++ b/backend/danswer/chat/input_prompts.yaml @@ -0,0 +1,24 @@ +input_prompts: + - id: -5 + prompt: "Elaborate" + content: "Elaborate on the above, give me a more in depth explanation." + active: true + is_public: true + + - id: -4 + prompt: "Reword" + content: "Help me rewrite the following politely and concisely for professional communication:\n" + active: true + is_public: true + + - id: -3 + prompt: "Email" + content: "Write a professional email for me including a subject line, signature, etc. Template the parts that need editing with [ ]. The email should cover the following points:\n" + active: true + is_public: true + + - id: -2 + prompt: "Debug" + content: "Provide step-by-step troubleshooting instructions for the following issue:\n" + active: true + is_public: true diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py index 342802e6c5c..0690f08b759 100644 --- a/backend/danswer/chat/load_yamls.py +++ b/backend/danswer/chat/load_yamls.py @@ -1,13 +1,17 @@ import yaml from sqlalchemy.orm import Session +from danswer.configs.chat_configs import INPUT_PROMPT_YAML from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT from danswer.configs.chat_configs import PERSONAS_YAML from danswer.configs.chat_configs import PROMPTS_YAML from danswer.db.document_set import get_or_create_document_set_by_name from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.input_prompt import insert_input_prompt_if_not_exists from danswer.db.models import DocumentSet as DocumentSetDBModel +from danswer.db.models import Persona from danswer.db.models import Prompt as PromptDBModel +from danswer.db.models import Tool as ToolDBModel from danswer.db.persona import get_prompt_by_name from danswer.db.persona import upsert_persona from danswer.db.persona import upsert_prompt @@ -76,9 +80,31 @@ def load_personas_from_yaml( prompt_ids = [prompt.id for prompt in prompts if prompt is not None] p_id = persona.get("id") + tool_ids = [] + if persona.get("image_generation"): + image_gen_tool = ( + db_session.query(ToolDBModel) + .filter(ToolDBModel.name == "ImageGenerationTool") + .first() + ) + if image_gen_tool: + tool_ids.append(image_gen_tool.id) + + llm_model_provider_override = persona.get("llm_model_provider_override") + llm_model_version_override = persona.get("llm_model_version_override") + + # Set specific overrides for image generation persona + if persona.get("image_generation"): + llm_model_version_override = "gpt-4o" + + existing_persona = ( + db_session.query(Persona) + .filter(Persona.name == persona["name"]) + .first() + ) + upsert_persona( user=None, - # Negative to not conflict with existing personas persona_id=(-1 * p_id) if p_id is not None else None, name=persona["name"], description=persona["description"], @@ -88,20 +114,52 @@ def load_personas_from_yaml( llm_relevance_filter=persona.get("llm_relevance_filter"), starter_messages=persona.get("starter_messages"), llm_filter_extraction=persona.get("llm_filter_extraction"), - llm_model_provider_override=None, - llm_model_version_override=None, + icon_shape=persona.get("icon_shape"), + icon_color=persona.get("icon_color"), + llm_model_provider_override=llm_model_provider_override, + llm_model_version_override=llm_model_version_override, recency_bias=RecencyBiasSetting(persona["recency_bias"]), prompt_ids=prompt_ids, document_set_ids=doc_set_ids, + tool_ids=tool_ids, default_persona=True, is_public=True, + display_priority=existing_persona.display_priority + if existing_persona is not None + else persona.get("display_priority"), + is_visible=existing_persona.is_visible + if existing_persona is not None + else persona.get("is_visible"), + db_session=db_session, + ) + + +def load_input_prompts_from_yaml(input_prompts_yaml: str = INPUT_PROMPT_YAML) -> None: + with open(input_prompts_yaml, "r") as file: + data = yaml.safe_load(file) + + all_input_prompts = data.get("input_prompts", []) + with Session(get_sqlalchemy_engine()) as db_session: + for input_prompt in all_input_prompts: + # If these prompts are deleted (which is a hard delete in the DB), on server startup + # they will be recreated, but the user can always just deactivate them, just a light inconvenience + insert_input_prompt_if_not_exists( + user=None, + input_prompt_id=input_prompt.get("id"), + prompt=input_prompt["prompt"], + content=input_prompt["content"], + is_public=input_prompt["is_public"], + active=input_prompt.get("active", True), db_session=db_session, + commit=True, ) def load_chat_yamls( prompt_yaml: str = PROMPTS_YAML, personas_yaml: str = PERSONAS_YAML, + input_prompts_yaml: str = INPUT_PROMPT_YAML, ) -> None: load_prompts_from_yaml(prompt_yaml) load_personas_from_yaml(personas_yaml) + load_input_prompts_from_yaml(input_prompts_yaml) diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index 7fc526a5cbe..6d12d68df08 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -9,6 +9,7 @@ from danswer.search.enums import SearchType from danswer.search.models import RetrievalDocs from danswer.search.models import SearchResponse +from danswer.tools.custom.base_tool_types import ToolResultType class LlmDoc(BaseModel): @@ -34,19 +35,37 @@ class QADocsResponse(RetrievalDocs): applied_time_cutoff: datetime | None recency_bias_multiplier: float - def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore - initial_dict = super().dict(*args, **kwargs) # type: ignore + def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + initial_dict = super().model_dump(mode="json", *args, **kwargs) # type: ignore initial_dict["applied_time_cutoff"] = ( self.applied_time_cutoff.isoformat() if self.applied_time_cutoff else None ) + return initial_dict -# Second chunk of info for streaming QA class LLMRelevanceFilterResponse(BaseModel): relevant_chunk_indices: list[int] +class RelevanceAnalysis(BaseModel): + relevant: bool + content: str | None = None + + +class SectionRelevancePiece(RelevanceAnalysis): + """LLM analysis mapped to an Inference Section""" + + document_id: str + chunk_id: int # ID of the center chunk for a given inference section + + +class DocumentRelevance(BaseModel): + """Contains all relevance information for a given search""" + + relevance_summaries: dict[str, RelevanceAnalysis] + + class DanswerAnswerPiece(BaseModel): # A small piece of a complete answer. Used for streaming back answers. answer_piece: str | None # if None, specifies the end of an Answer @@ -59,8 +78,14 @@ class CitationInfo(BaseModel): document_id: str +class MessageResponseIDInfo(BaseModel): + user_message_id: int | None + reserved_assistant_message_id: int + + class StreamingError(BaseModel): error: str + stack_trace: str | None = None class DanswerQuote(BaseModel): @@ -107,7 +132,7 @@ class ImageGenerationDisplay(BaseModel): class CustomToolResponse(BaseModel): - response: dict + response: ToolResultType tool_name: str diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml index 27dbb9f290f..9955b1d73c5 100644 --- a/backend/danswer/chat/personas.yaml +++ b/backend/danswer/chat/personas.yaml @@ -42,11 +42,15 @@ personas: # - "Engineer Onboarding" # - "Benefits" document_sets: [] + icon_shape: 23013 + icon_color: "#6FB1FF" + display_priority: 1 + is_visible: true - id: 1 - name: "GPT" + name: "General" description: > - Assistant with no access to documents. Chat with just the Language Model. + Assistant with no access to documents. Chat with just the Large Language Model. prompts: - "OnlyLLM" num_chunks: 0 @@ -54,6 +58,10 @@ personas: llm_filter_extraction: true recency_bias: "auto" document_sets: [] + icon_shape: 50910 + icon_color: "#FF6F6F" + display_priority: 0 + is_visible: true - id: 2 name: "Paraphrase" @@ -66,3 +74,25 @@ personas: llm_filter_extraction: true recency_bias: "auto" document_sets: [] + icon_shape: 45519 + icon_color: "#6FFF8D" + display_priority: 2 + is_visible: false + + + - id: 3 + name: "Art" + description: > + Assistant for generating images based on descriptions. + prompts: + - "ImageGeneration" + num_chunks: 0 + llm_relevance_filter: false + llm_filter_extraction: false + recency_bias: "no_decay" + document_sets: [] + icon_shape: 234124 + icon_color: "#9B59B6" + image_generation: true + display_priority: 3 + is_visible: true diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 1781c2232f1..2eea2cfc20f 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -1,3 +1,4 @@ +import traceback from collections.abc import Callable from collections.abc import Iterator from functools import partial @@ -11,6 +12,7 @@ from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import ImageGenerationDisplay from danswer.chat.models import LLMRelevanceFilterResponse +from danswer.chat.models import MessageResponseIDInfo from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError from danswer.configs.chat_configs import BING_API_KEY @@ -27,15 +29,16 @@ from danswer.db.chat import get_db_search_doc_by_id from danswer.db.chat import get_doc_query_identifiers_from_model from danswer.db.chat import get_or_create_root_message +from danswer.db.chat import reserve_message_id from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.chat import translate_db_search_doc_to_server_search_doc -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session_context_manager from danswer.db.llm import fetch_existing_llm_providers from danswer.db.models import SearchDoc as DbSearchDoc from danswer.db.models import ToolCall from danswer.db.models import User from danswer.db.persona import get_persona_by_id +from danswer.db.search_settings import get_current_search_settings from danswer.document_index.factory import get_default_document_index from danswer.file_store.models import ChatFileType from danswer.file_store.models import FileDescriptor @@ -51,7 +54,9 @@ from danswer.llm.factory import get_llms_for_persona from danswer.llm.factory import get_main_llm_from_tuple from danswer.llm.interfaces import LLMConfig -from danswer.llm.utils import get_default_llm_tokenizer +from danswer.llm.utils import litellm_exception_to_error_msg +from danswer.natural_language_processing.utils import get_tokenizer +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import OptionalSearchSetting from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType @@ -60,6 +65,7 @@ from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.search.utils import dedupe_documents from danswer.search.utils import drop_llm_indices +from danswer.search.utils import relevant_sections_to_indices from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.utils import get_json_line @@ -178,7 +184,7 @@ def _handle_internet_search_tool_response_summary( rephrased_query=internet_search_response.revised_query, top_documents=response_docs, predicted_flow=QueryFlow.QUESTION_ANSWER, - predicted_search=SearchType.HYBRID, + predicted_search=SearchType.SEMANTIC, applied_source_filters=[], applied_time_cutoff=None, recency_bias_multiplier=1.0, @@ -187,37 +193,46 @@ def _handle_internet_search_tool_response_summary( ) -def _check_should_force_search( - new_msg_req: CreateChatMessageRequest, -) -> ForceUseTool | None: - # If files are already provided, don't run the search tool +def _get_force_search_settings( + new_msg_req: CreateChatMessageRequest, tools: list[Tool] +) -> ForceUseTool: + internet_search_available = any( + isinstance(tool, InternetSearchTool) for tool in tools + ) + search_tool_available = any(isinstance(tool, SearchTool) for tool in tools) + + if not internet_search_available and not search_tool_available: + # Does not matter much which tool is set here as force is false and neither tool is available + return ForceUseTool(force_use=False, tool_name=SearchTool._NAME) + + tool_name = SearchTool._NAME if search_tool_available else InternetSearchTool._NAME + # Currently, the internet search tool does not support query override + args = ( + {"query": new_msg_req.query_override} + if new_msg_req.query_override and tool_name == SearchTool._NAME + else None + ) + if new_msg_req.file_descriptors: - return None + # If user has uploaded files they're using, don't run any of the search tools + return ForceUseTool(force_use=False, tool_name=tool_name) - if ( - new_msg_req.query_override - or ( + should_force_search = any( + [ new_msg_req.retrieval_options - and new_msg_req.retrieval_options.run_search == OptionalSearchSetting.ALWAYS - ) - or new_msg_req.search_doc_ids - or DISABLE_LLM_CHOOSE_SEARCH - ): - args = ( - {"query": new_msg_req.query_override} - if new_msg_req.query_override - else None - ) - # if we are using selected docs, just put something here so the Tool doesn't need - # to build its own args via an LLM call - if new_msg_req.search_doc_ids: - args = {"query": new_msg_req.message} - - return ForceUseTool( - tool_name=SearchTool._NAME, - args=args, - ) - return None + and new_msg_req.retrieval_options.run_search + == OptionalSearchSetting.ALWAYS, + new_msg_req.search_doc_ids, + DISABLE_LLM_CHOOSE_SEARCH, + ] + ) + + if should_force_search: + # If we are using selected docs, just put something here so the Tool doesn't need to build its own args via an LLM call + args = {"query": new_msg_req.message} if new_msg_req.search_doc_ids else args + return ForceUseTool(force_use=True, tool_name=tool_name, args=args) + + return ForceUseTool(force_use=False, tool_name=tool_name, args=args) ChatPacket = ( @@ -229,6 +244,7 @@ def _check_should_force_search( | CitationInfo | ImageGenerationDisplay | CustomToolResponse + | MessageResponseIDInfo ) ChatPacketStream = Iterator[ChatPacket] @@ -244,17 +260,21 @@ def stream_chat_message_objects( max_document_percentage: float = CHAT_TARGET_CHUNK_PERCENTAGE, # if specified, uses the last user message and does not create a new user message based # on the `new_msg_req.message`. Currently, requires a state where the last message is a - # user message (e.g. this can only be used for the chat-seeding flow). use_existing_user_message: bool = False, litellm_additional_headers: dict[str, str] | None = None, + is_connected: Callable[[], bool] | None = None, ) -> ChatPacketStream: """Streams in order: 1. [conditional] Retrieved documents if a search needs to be run 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on 3. [always] A set of streamed LLM tokens or an error anywhere along the line if something fails 4. [always] Details on the final AI response message that is created - """ + # Currently surrounding context is not supported for chat + # Chat is already token heavy and harder for the model to process plus it would roll history over much faster + new_msg_req.chunks_above = 0 + new_msg_req.chunks_below = 0 + try: user_id = user.id if user is not None else None @@ -274,7 +294,10 @@ def stream_chat_message_objects( # use alternate persona if alternative assistant id is passed in if alternate_assistant_id is not None: persona = get_persona_by_id( - alternate_assistant_id, user=user, db_session=db_session + alternate_assistant_id, + user=user, + db_session=db_session, + is_for_edit=False, ) else: persona = chat_session.persona @@ -297,14 +320,20 @@ def stream_chat_message_objects( except GenAIDisabledException: raise RuntimeError("LLM is disabled. Can't use chat flow without LLM.") - llm_tokenizer = get_default_llm_tokenizer() + llm_provider = llm.config.model_provider + llm_model_name = llm.config.model_name + + llm_tokenizer = get_tokenizer( + model_name=llm_model_name, + provider_type=llm_provider, + ) llm_tokenizer_encode_func = cast( Callable[[str], list[int]], llm_tokenizer.encode ) - embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None + primary_index_name=search_settings.index_name, secondary_index_name=None ) # Every chat Session begins with an empty root message @@ -322,7 +351,15 @@ def stream_chat_message_objects( parent_message = root_message user_message = None - if not use_existing_user_message: + + if new_msg_req.regenerate: + final_msg, history_msgs = create_chat_chain( + stop_at_message_id=parent_id, + chat_session_id=chat_session_id, + db_session=db_session, + ) + + elif not use_existing_user_message: # Create new message at the right place in the tree and update the parent's child pointer # Don't commit yet until we verify the chat message chain user_message = create_new_chat_message( @@ -361,6 +398,14 @@ def stream_chat_message_objects( "when the last message is not a user message." ) + # Disable Query Rephrasing for the first message + # This leads to a better first response since the LLM rephrasing the question + # leads to worst search quality + if not history_msgs: + new_msg_req.query_override = ( + new_msg_req.query_override or new_msg_req.message + ) + # load all files needed for this chat chain in memory files = load_all_chat_files( history_msgs, new_msg_req.file_descriptors, db_session @@ -420,9 +465,23 @@ def stream_chat_message_objects( else default_num_chunks ), max_window_percentage=max_document_percentage, - use_sections=new_msg_req.chunks_above > 0 - or new_msg_req.chunks_below > 0, ) + reserved_message_id = reserve_message_id( + db_session=db_session, + chat_session_id=chat_session_id, + parent_message=user_message.id + if user_message is not None + else parent_message.id, + message_type=MessageType.ASSISTANT, + ) + yield MessageResponseIDInfo( + user_message_id=user_message.id if user_message else None, + reserved_assistant_message_id=reserved_message_id, + ) + + overridden_model = ( + new_msg_req.llm_override.model_version if new_msg_req.llm_override else None + ) # Cannot determine these without the LLM step or breaking out early partial_response = partial( @@ -430,6 +489,7 @@ def stream_chat_message_objects( chat_session_id=chat_session_id, parent_message=final_msg, prompt_id=prompt_id, + overridden_model=overridden_model, # message=, # rephrased_query=, # token_count=, @@ -476,6 +536,9 @@ def stream_chat_message_objects( chunks_above=new_msg_req.chunks_above, chunks_below=new_msg_req.chunks_below, full_doc=new_msg_req.full_doc, + evaluation_type=LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP, ) tool_dict[db_tool_model.id] = [search_tool] elif tool_cls.__name__ == ImageGenerationTool.__name__: @@ -544,13 +607,16 @@ def stream_chat_message_objects( tools.extend(tool_list) # factor in tool definition size when pruning - document_pruning_config.tool_num_tokens = compute_all_tool_tokens(tools) + document_pruning_config.tool_num_tokens = compute_all_tool_tokens( + tools, llm_tokenizer + ) document_pruning_config.using_tool_message = explicit_tool_calling_supported( - llm.config.model_provider, llm.config.model_name + llm_provider, llm_model_name ) # LLM prompt building, response capturing, etc. answer = Answer( + is_connected=is_connected, question=final_msg.message, latest_query_files=latest_query_files, answer_style_config=AnswerStyleConfig( @@ -576,11 +642,7 @@ def stream_chat_message_objects( PreviousMessage.from_chat_message(msg, files) for msg in history_msgs ], tools=tools, - force_use_tool=( - _check_should_force_search(new_msg_req) - if search_tool and len(tools) == 1 - else None - ), + force_use_tool=_get_force_search_settings(new_msg_req, tools), ) reference_db_search_docs = None @@ -588,6 +650,7 @@ def stream_chat_message_objects( ai_message_files = None # any files to associate with the AI message e.g. dall-e generated images dropped_indices = None tool_result = None + for packet in answer.processed_streamed_output: if isinstance(packet, ToolResponse): if packet.id == SEARCH_RESPONSE_SUMMARY_ID: @@ -606,18 +669,28 @@ def stream_chat_message_objects( ) yield qa_docs_response elif packet.id == SECTION_RELEVANCE_LIST_ID: - chunk_indices = packet.response + relevance_sections = packet.response + + if reference_db_search_docs is not None: + llm_indices = relevant_sections_to_indices( + relevance_sections=relevance_sections, + items=[ + translate_db_search_doc_to_server_search_doc(doc) + for doc in reference_db_search_docs + ], + ) + + if dropped_indices: + llm_indices = drop_llm_indices( + llm_indices=llm_indices, + search_docs=reference_db_search_docs, + dropped_indices=dropped_indices, + ) - if reference_db_search_docs is not None and dropped_indices: - chunk_indices = drop_llm_indices( - llm_indices=chunk_indices, - search_docs=reference_db_search_docs, - dropped_indices=dropped_indices, + yield LLMRelevanceFilterResponse( + relevant_chunk_indices=llm_indices ) - yield LLMRelevanceFilterResponse( - relevant_chunk_indices=chunk_indices - ) elif packet.id == IMAGE_GENERATION_RESPONSE_ID: img_generation_response = cast( list[ImageGenerationResponse], packet.response @@ -653,20 +726,18 @@ def stream_chat_message_objects( if isinstance(packet, ToolCallFinalResult): tool_result = packet yield cast(ChatPacket, packet) - + logger.debug("Reached end of stream") except Exception as e: - logger.exception("Failed to process chat message") - - # Don't leak the API key error_msg = str(e) - if llm.config.api_key and llm.config.api_key.lower() in error_msg.lower(): - error_msg = ( - f"LLM failed to respond. Invalid API " - f"key error from '{llm.config.model_provider}'." - ) + logger.exception(f"Failed to process chat message: {error_msg}") + + stack_trace = traceback.format_exc() + client_error_msg = litellm_exception_to_error_msg(e, llm) + if llm.config.api_key and len(llm.config.api_key) > 2: + error_msg = error_msg.replace(llm.config.api_key, "[REDACTED_API_KEY]") + stack_trace = stack_trace.replace(llm.config.api_key, "[REDACTED_API_KEY]") - yield StreamingError(error=error_msg) - # Cancel the transaction so that no messages are saved + yield StreamingError(error=client_error_msg, stack_trace=stack_trace) db_session.rollback() return @@ -686,6 +757,7 @@ def stream_chat_message_objects( tool_name_to_tool_id[tool.name] = tool_id gen_ai_response_message = partial_response( + reserved_message_id=reserved_message_id, message=answer.llm_answer, rephrased_query=( qa_docs_response.rephrased_query if qa_docs_response else None @@ -706,6 +778,8 @@ def stream_chat_message_objects( if tool_result else [], ) + + logger.debug("Committing messages") db_session.commit() # actually save user / assistant message msg_detail_response = translate_db_message_to_chat_message_detail( @@ -714,7 +788,8 @@ def stream_chat_message_objects( yield msg_detail_response except Exception as e: - logger.exception(e) + error_msg = str(e) + logger.exception(error_msg) # Frontend will erase whatever answer and show this instead yield StreamingError(error="Failed to parse LLM output") @@ -726,6 +801,7 @@ def stream_chat_message( user: User | None, use_existing_user_message: bool = False, litellm_additional_headers: dict[str, str] | None = None, + is_connected: Callable[[], bool] | None = None, ) -> Iterator[str]: with get_session_context_manager() as db_session: objects = stream_chat_message_objects( @@ -734,6 +810,7 @@ def stream_chat_message( db_session=db_session, use_existing_user_message=use_existing_user_message, litellm_additional_headers=litellm_additional_headers, + is_connected=is_connected, ) for obj in objects: - yield get_json_line(obj.dict()) + yield get_json_line(obj.model_dump()) diff --git a/backend/danswer/chat/prompts.yaml b/backend/danswer/chat/prompts.yaml index 86b3b8baa75..b3b9bae6467 100644 --- a/backend/danswer/chat/prompts.yaml +++ b/backend/danswer/chat/prompts.yaml @@ -34,6 +34,23 @@ prompts: # Prompts the LLM to include citations in the for [1], [2] etc. # which get parsed to match the passed in sources include_citations: true + + - name: "ImageGeneration" + description: "Generates images based on user prompts!" + system: > + You are an advanced image generation system capable of creating diverse and detailed images. + + You can interpret user prompts and generate high-quality, creative images that match their descriptions. + + You always strive to create safe and appropriate content, avoiding any harmful or offensive imagery. + task: > + Generate an image based on the user's description. + + Provide a detailed description of the generated image, including key elements, colors, and composition. + + If the request is not possible or appropriate, explain why and suggest alternatives. + datetime_aware: true + include_citations: false - name: "OnlyLLM" description: "Chat directly with the LLM!" diff --git a/backend/danswer/chat/tools.py b/backend/danswer/chat/tools.py index 717cead6308..11b40592973 100644 --- a/backend/danswer/chat/tools.py +++ b/backend/danswer/chat/tools.py @@ -1,4 +1,4 @@ -from typing import TypedDict +from typing_extensions import TypedDict # noreorder from pydantic import BaseModel diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 47cfee37fec..f6b218c5f56 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -93,6 +93,14 @@ SMTP_PASS = os.environ.get("SMTP_PASS", "your-gmail-password") EMAIL_FROM = os.environ.get("EMAIL_FROM") or SMTP_USER +# If set, Danswer will listen to the `expires_at` returned by the identity +# provider (e.g. Okta, Google, etc.) and force the user to re-authenticate +# after this time has elapsed. Disabled since by default many auth providers +# have very short expiry times (e.g. 1 hour) which provide a poor user experience +TRACK_EXTERNAL_IDP_EXPIRY = ( + os.environ.get("TRACK_EXTERNAL_IDP_EXPIRY", "").lower() == "true" +) + ##### # DB Configs @@ -129,6 +137,17 @@ POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" +# defaults to False +POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true" + +# recycle timeout in seconds +POSTGRES_POOL_RECYCLE_DEFAULT = 60 * 20 # 20 minutes +try: + POSTGRES_POOL_RECYCLE = int( + os.environ.get("POSTGRES_POOL_RECYCLE", POSTGRES_POOL_RECYCLE_DEFAULT) + ) +except ValueError: + POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT ##### # Connector Configs @@ -181,8 +200,8 @@ ] # Avoid to get archived pages -CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES = ( - os.environ.get("CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES", "").lower() == "true" +CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES = ( + os.environ.get("CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES", "").lower() == "true" ) # Save pages labels as Danswer metadata tags @@ -191,6 +210,16 @@ os.environ.get("CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING", "").lower() == "true" ) +# Attachments exceeding this size will not be retrieved (in bytes) +CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD = int( + os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD", 10 * 1024 * 1024) +) +# Attachments with more chars than this will not be indexed. This is to prevent extremely +# large files from freezing indexing. 200,000 is ~100 google doc pages. +CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD = int( + os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD", 200_000) +) + JIRA_CONNECTOR_LABELS_TO_SKIP = [ ignored_tag for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",") @@ -212,10 +241,11 @@ os.environ.get("EXPERIMENTAL_CHECKPOINTING_ENABLED", "").lower() == "true" ) +PRUNING_DISABLED = -1 DEFAULT_PRUNING_FREQ = 60 * 60 * 24 # Once a day -PREVENT_SIMULTANEOUS_PRUNING = ( - os.environ.get("PREVENT_SIMULTANEOUS_PRUNING", "").lower() == "true" +ALLOW_SIMULTANEOUS_PRUNING = ( + os.environ.get("ALLOW_SIMULTANEOUS_PRUNING", "").lower() == "true" ) # This is the maxiumum rate at which documents are queried for a pruning job. 0 disables the limitation. @@ -223,6 +253,11 @@ os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0) ) +# comma delimited list of zendesk article labels to skip indexing for +ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS = os.environ.get( + "ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS", "" +).split(",") + ##### # Indexing Configs @@ -243,16 +278,39 @@ # fairly large amount of memory in order to increase substantially, since # each worker loads the embedding models into memory. NUM_INDEXING_WORKERS = int(os.environ.get("NUM_INDEXING_WORKERS") or 1) -CHUNK_OVERLAP = 0 +NUM_SECONDARY_INDEXING_WORKERS = int( + os.environ.get("NUM_SECONDARY_INDEXING_WORKERS") or NUM_INDEXING_WORKERS +) # More accurate results at the expense of indexing speed and index size (stores additional 4 MINI_CHUNK vectors) -ENABLE_MINI_CHUNK = os.environ.get("ENABLE_MINI_CHUNK", "").lower() == "true" +ENABLE_MULTIPASS_INDEXING = ( + os.environ.get("ENABLE_MULTIPASS_INDEXING", "").lower() == "true" +) # Finer grained chunking for more detail retention # Slightly larger since the sentence aware split is a max cutoff so most minichunks will be under MINI_CHUNK_SIZE # tokens. But we need it to be at least as big as 1/4th chunk size to avoid having a tiny mini-chunk at the end MINI_CHUNK_SIZE = 150 + +# This is the number of regular chunks per large chunk +LARGE_CHUNK_RATIO = 4 + +# Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out +# We don't want the metadata to overwhelm the actual contents of the chunk +SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true" # Timeout to wait for job's last update before killing it, in hours CLEANUP_INDEXING_JOBS_TIMEOUT = int(os.environ.get("CLEANUP_INDEXING_JOBS_TIMEOUT", 3)) +# The indexer will warn in the logs whenver a document exceeds this threshold (in bytes) +INDEXING_SIZE_WARNING_THRESHOLD = int( + os.environ.get("INDEXING_SIZE_WARNING_THRESHOLD", 100 * 1024 * 1024) +) + +# during indexing, will log verbose memory diff stats every x batches and at the end. +# 0 disables this behavior and is the default. +INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL", 0)) + +# During an indexing attempt, specifies the number of batches which are allowed to +# exception without aborting the attempt. +INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT", 0)) ##### # Miscellaneous @@ -280,6 +338,10 @@ os.environ.get("LOG_VESPA_TIMING_INFORMATION", "").lower() == "true" ) LOG_ENDPOINT_LATENCY = os.environ.get("LOG_ENDPOINT_LATENCY", "").lower() == "true" +LOG_POSTGRES_LATENCY = os.environ.get("LOG_POSTGRES_LATENCY", "").lower() == "true" +LOG_POSTGRES_CONN_COUNTS = ( + os.environ.get("LOG_POSTGRES_CONN_COUNTS", "").lower() == "true" +) # Anonymous usage telemetry DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true" diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index 198793a0043..454412ff87e 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -3,12 +3,13 @@ PROMPTS_YAML = "./danswer/chat/prompts.yaml" PERSONAS_YAML = "./danswer/chat/personas.yaml" +INPUT_PROMPT_YAML = "./danswer/chat/input_prompts.yaml" NUM_RETURNED_HITS = os.environ.get("TOOL_SEARCH_NUM_RETURNED_HITS") or 50 # Used for LLM filtering and reranking # We want this to be approximately the number of results we want to show on the first page # It cannot be too large due to cost and latency implications -NUM_RERANKED_RESULTS = 20 +NUM_POSTPROCESSED_RESULTS = 20 # May be less depending on model MAX_CHUNKS_FED_TO_CHAT = float(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 10.0) @@ -30,13 +31,9 @@ DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak" # For the highest matching base size chunk, how many chunks above and below do we pull in by default # Note this is not in any of the deployment configs yet -CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 0) -CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 0) -# Whether the LLM should evaluate all of the document chunks passed in for usefulness -# in relation to the user query -DISABLE_LLM_CHUNK_FILTER = ( - os.environ.get("DISABLE_LLM_CHUNK_FILTER", "").lower() == "true" -) +# Currently only applies to search flow not chat +CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 1) +CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 1) # Whether the LLM should be used to decide if a search would help given the chat history DISABLE_LLM_CHOOSE_SEARCH = ( os.environ.get("DISABLE_LLM_CHOOSE_SEARCH", "").lower() == "true" @@ -47,22 +44,19 @@ # 1 edit per 20 characters, currently unused due to fuzzy match being too slow QUOTE_ALLOWED_ERROR_PERCENT = 0.05 QA_TIMEOUT = int(os.environ.get("QA_TIMEOUT") or "60") # 60 seconds -# Keyword Search Drop Stopwords -# If user has changed the default model, would most likely be to use a multilingual -# model, the stopwords are NLTK english stopwords so then we would want to not drop the keywords -if os.environ.get("EDIT_KEYWORD_QUERY"): - EDIT_KEYWORD_QUERY = os.environ.get("EDIT_KEYWORD_QUERY", "").lower() == "true" -else: - EDIT_KEYWORD_QUERY = not os.environ.get("DOCUMENT_ENCODER_MODEL") # Weighting factor between Vector and Keyword Search, 1 for completely vector search -HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.62))) +HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.5))) +HYBRID_ALPHA_KEYWORD = max( + 0, min(1, float(os.environ.get("HYBRID_ALPHA_KEYWORD") or 0.4)) +) # Weighting factor between Title and Content of documents during search, 1 for completely # Title based. Default heavily favors Content because Title is also included at the top of # Content. This is to avoid cases where the Content is very relevant but it may not be clear # if the title is separated out. Title is most of a "boost" than a separate field. TITLE_CONTENT_RATIO = max( - 0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.20)) + 0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.10)) ) + # A list of languages passed to the LLM to rephase the query # For example "English,French,Spanish", be sure to use the "," separator MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None @@ -75,6 +69,17 @@ or "The name of the conversation must be in the same language as the user query." ) +# Agentic search takes significantly more tokens and therefore has much higher cost. +# This configuration allows users to get a search-only experience with instant results +# and no involvement from the LLM. +# Additionally, some LLM providers have strict rate limits which may prohibit +# sending many API requests at once (as is done in agentic search). +# Whether the LLM should evaluate all of the document chunks passed in for usefulness +# in relation to the user query +DISABLE_LLM_DOC_RELEVANCE = ( + os.environ.get("DISABLE_LLM_DOC_RELEVANCE", "").lower() == "true" +) + # Stops streaming answers back to the UI if this pattern is seen: STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 1042d688532..64c162d7bef 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -1,25 +1,7 @@ +from enum import auto from enum import Enum -DOCUMENT_ID = "document_id" -CHUNK_ID = "chunk_id" -BLURB = "blurb" -CONTENT = "content" SOURCE_TYPE = "source_type" -SOURCE_LINKS = "source_links" -SOURCE_LINK = "link" -SEMANTIC_IDENTIFIER = "semantic_identifier" -TITLE = "title" -SKIP_TITLE_EMBEDDING = "skip_title" -SECTION_CONTINUATION = "section_continuation" -EMBEDDINGS = "embeddings" -TITLE_EMBEDDING = "title_embedding" -ALLOWED_USERS = "allowed_users" -ACCESS_CONTROL_LIST = "access_control_list" -DOCUMENT_SETS = "document_sets" -TIME_FILTER = "time_filter" -METADATA = "metadata" -METADATA_LIST = "metadata_list" -MATCH_HIGHLIGHTS = "match_highlights" # stored in the `metadata` of a chunk. Used to signify that this chunk should # not be used for QA. For example, Google Drive file types which can't be parsed # are still useful as a search result but not for QA. @@ -27,23 +9,12 @@ # NOTE: deprecated, only used for porting key from old system GEN_AI_API_KEY_STORAGE_KEY = "genai_api_key" PUBLIC_DOC_PAT = "PUBLIC" -PUBLIC_DOCUMENT_SET = "__PUBLIC" -QUOTE = "quote" -BOOST = "boost" -DOC_UPDATED_AT = "doc_updated_at" # Indexed as seconds since epoch -PRIMARY_OWNERS = "primary_owners" -SECONDARY_OWNERS = "secondary_owners" -RECENCY_BIAS = "recency_bias" -HIDDEN = "hidden" -SCORE = "score" ID_SEPARATOR = ":;:" DEFAULT_BOOST = 0 SESSION_KEY = "session" -QUERY_EVENT_ID = "query_event_id" -LLM_CHUNKS = "llm_chunks" # For chunking/processing chunks -TITLE_SEPARATOR = "\n\r\n" +RETURN_SEPARATOR = "\n\r\n" SECTION_SEPARATOR = "\n\n" # For combining attributes, doesn't have to be unique/perfect to work INDEX_SEPARATOR = "===" @@ -58,12 +29,37 @@ "You can still use Danswer as a search engine." ) +# Postgres connection constants for application_name +POSTGRES_WEB_APP_NAME = "web" +POSTGRES_INDEXER_APP_NAME = "indexer" +POSTGRES_CELERY_APP_NAME = "celery" +POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat" +POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker" +POSTGRES_PERMISSIONS_APP_NAME = "permissions" +POSTGRES_UNKNOWN_APP_NAME = "unknown" # API Keys DANSWER_API_KEY_PREFIX = "API_KEY__" DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "danswerapikey.ai" UNNAMED_KEY_PLACEHOLDER = "Unnamed" +# Key-Value store keys +KV_REINDEX_KEY = "needs_reindexing" +KV_SEARCH_SETTINGS = "search_settings" +KV_USER_STORE_KEY = "INVITED_USERS" +KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" +KV_CRED_KEY = "credential_id_{}" +KV_GMAIL_CRED_KEY = "gmail_app_credential" +KV_GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" +KV_GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential" +KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" +KV_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key" +KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" +KV_SETTINGS_KEY = "danswer_settings" +KV_CUSTOMER_UUID_KEY = "customer_uuid" +KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" +KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" + class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type @@ -107,6 +103,10 @@ class DocumentSource(str, Enum): NOT_APPLICABLE = "not_applicable" +class NotificationType(str, Enum): + REINDEX = "reindex" + + class BlobType(str, Enum): R2 = "r2" S3 = "s3" @@ -162,3 +162,7 @@ class FileOrigin(str, Enum): CONNECTOR = "connector" GENERATED_REPORT = "generated_report" OTHER = "other" + + +class PostgresAdvisoryLocks(Enum): + KOMBU_MESSAGE_CLEANUP_LOCK_ID = auto() diff --git a/backend/danswer/configs/danswerbot_configs.py b/backend/danswer/configs/danswerbot_configs.py index b75d69d1f13..3fca9bc78b3 100644 --- a/backend/danswer/configs/danswerbot_configs.py +++ b/backend/danswer/configs/danswerbot_configs.py @@ -73,3 +73,15 @@ DANSWER_BOT_REPHRASE_MESSAGE = ( os.environ.get("DANSWER_BOT_REPHRASE_MESSAGE", "").lower() == "true" ) + +# DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of +# responses DanswerBot can send in a given time period. +# Set to 0 to disable the limit. +DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD = int( + os.environ.get("DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD", "5000") +) +# DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS is the number +# of seconds until the response limit is reset. +DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS = int( + os.environ.get("DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS", "86400") +) diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index b4c0e8cf29e..9e323c2b539 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -12,13 +12,15 @@ # The useable models configured as below must be SentenceTransformer compatible # NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING # IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI -DEFAULT_DOCUMENT_ENCODER_MODEL = "intfloat/e5-base-v2" +DEFAULT_DOCUMENT_ENCODER_MODEL = "nomic-ai/nomic-embed-text-v1" DOCUMENT_ENCODER_MODEL = ( os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL ) # If the below is changed, Vespa deployment must also be changed DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768) # Model should be chosen with 512 context size, ideally don't change this +# If multipass_indexing is enabled, the max context size would be set to +# DOC_EMBEDDING_CONTEXT_SIZE * LARGE_CHUNK_RATIO DOC_EMBEDDING_CONTEXT_SIZE = 512 NORMALIZE_EMBEDDINGS = ( os.environ.get("NORMALIZE_EMBEDDINGS") or "true" @@ -34,17 +36,16 @@ SIM_SCORE_RANGE_LOW = float(os.environ.get("SIM_SCORE_RANGE_LOW") or 0.0) SIM_SCORE_RANGE_HIGH = float(os.environ.get("SIM_SCORE_RANGE_HIGH") or 1.0) # Certain models like e5, BGE, etc use a prefix for asymmetric retrievals (query generally shorter than docs) -ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "query: ") -ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "passage: ") +ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ") +ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ") # Purely an optimization, memory limitation consideration BATCH_SIZE_ENCODE_CHUNKS = 8 +# don't send over too many chunks at once, as sending too many could cause timeouts +BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512 # For score display purposes, only way is to know the expected ranges CROSS_ENCODER_RANGE_MAX = 1 CROSS_ENCODER_RANGE_MIN = 0 -# Unused currently, can't be used with the current default encoder model due to its output range -SEARCH_DISTANCE_CUTOFF = 0 - ##### # Generative AI Model Configs @@ -79,8 +80,16 @@ GEN_AI_LLM_PROVIDER_TYPE = os.environ.get("GEN_AI_LLM_PROVIDER_TYPE") or None # Override the auto-detection of LLM max context length GEN_AI_MAX_TOKENS = int(os.environ.get("GEN_AI_MAX_TOKENS") or 0) or None + # Set this to be enough for an answer + quotes. Also used for Chat -GEN_AI_MAX_OUTPUT_TOKENS = int(os.environ.get("GEN_AI_MAX_OUTPUT_TOKENS") or 1024) +# This is the minimum token context we will leave for the LLM to generate an answer +GEN_AI_NUM_RESERVED_OUTPUT_TOKENS = int( + os.environ.get("GEN_AI_NUM_RESERVED_OUTPUT_TOKENS") or 1024 +) + +# Typically, GenAI models nowadays are at least 4K tokens +GEN_AI_MODEL_FALLBACK_MAX_TOKENS = 4096 + # Number of tokens from chat history to include at maximum # 3000 should be enough context regardless of use, no need to include as much as possible # as this drives up the cost unnecessarily diff --git a/backend/danswer/connectors/blob/connector.py b/backend/danswer/connectors/blob/connector.py index 2446bfd1666..a664a3d764a 100644 --- a/backend/danswer/connectors/blob/connector.py +++ b/backend/danswer/connectors/blob/connector.py @@ -56,7 +56,7 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None Raises ValueError for unsupported bucket types. """ - logger.info( + logger.debug( f"Loading credentials for {self.bucket_name} or type {self.bucket_type}" ) @@ -169,7 +169,7 @@ def _yield_blob_objects( end: datetime, ) -> GenerateDocumentsOutput: if self.s3_client is None: - raise ConnectorMissingCredentialError("Blog storage") + raise ConnectorMissingCredentialError("Blob storage") paginator = self.s3_client.get_paginator("list_objects_v2") pages = paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix) @@ -220,7 +220,7 @@ def _yield_blob_objects( yield batch def load_from_state(self) -> GenerateDocumentsOutput: - logger.info("Loading blob objects") + logger.debug("Loading blob objects") return self._yield_blob_objects( start=datetime(1970, 1, 1, tzinfo=timezone.utc), end=datetime.now(timezone.utc), @@ -230,7 +230,7 @@ def poll_source( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch ) -> GenerateDocumentsOutput: if self.s3_client is None: - raise ConnectorMissingCredentialError("Blog storage") + raise ConnectorMissingCredentialError("Blob storage") start_datetime = datetime.fromtimestamp(start, tz=timezone.utc) end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 1c348df6a3b..b8dc967a3d9 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -13,7 +13,11 @@ from atlassian import Confluence # type:ignore from requests import HTTPError -from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES +from danswer.configs.app_configs import ( + CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD, +) +from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD +from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE @@ -41,6 +45,14 @@ # 2. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost +NO_PERMISSIONS_TO_VIEW_ATTACHMENTS_ERROR_STR = ( + "User not permitted to view attachments on content" +) +NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = ( + "No parent or not permitted to view content with id" +) + + def _extract_confluence_keys_from_cloud_url(wiki_url: str) -> tuple[str, str, str]: """Sample URL w/ page: https://danswer.atlassian.net/wiki/spaces/1234abcd/pages/5678efgh/overview @@ -199,34 +211,56 @@ def _comment_dfs( comments_str += "\nComment:\n" + parse_html_page( comment_html, confluence_client ) - child_comment_pages = get_page_child_by_type( - comment_page["id"], - type="comment", - start=None, - limit=None, - expand="body.storage.value", - ) - comments_str = _comment_dfs( - comments_str, child_comment_pages, confluence_client - ) + try: + child_comment_pages = get_page_child_by_type( + comment_page["id"], + type="comment", + start=None, + limit=None, + expand="body.storage.value", + ) + comments_str = _comment_dfs( + comments_str, child_comment_pages, confluence_client + ) + except HTTPError as e: + # not the cleanest, but I'm not aware of a nicer way to check the error + if NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR not in str(e): + raise + return comments_str +def _datetime_from_string(datetime_string: str) -> datetime: + datetime_object = datetime.fromisoformat(datetime_string) + + if datetime_object.tzinfo is None: + # If no timezone info, assume it is UTC + datetime_object = datetime_object.replace(tzinfo=timezone.utc) + else: + # If not in UTC, translate it + datetime_object = datetime_object.astimezone(timezone.utc) + + return datetime_object + + class RecursiveIndexer: def __init__( self, batch_size: int, confluence_client: Confluence, - index_origin: bool, + index_recursively: bool, origin_page_id: str, ) -> None: self.batch_size = 1 # batch_size self.confluence_client = confluence_client - self.index_origin = index_origin + self.index_recursively = index_recursively self.origin_page_id = origin_page_id self.pages = self.recurse_children_pages(0, self.origin_page_id) + def get_origin_page(self) -> list[dict[str, Any]]: + return [self._fetch_origin_page()] + def get_pages(self, ind: int, size: int) -> list[dict]: if ind * size > len(self.pages): return [] @@ -282,12 +316,11 @@ def recurse_children_pages( current_level_pages = next_level_pages next_level_pages = [] - if self.index_origin: - try: - origin_page = self._fetch_origin_page() - pages.append(origin_page) - except Exception as e: - logger.warning(f"Appending origin page with id {page_id} failed: {e}") + try: + origin_page = self._fetch_origin_page() + pages.append(origin_page) + except Exception as e: + logger.warning(f"Appending origin page with id {page_id} failed: {e}") return pages @@ -340,7 +373,7 @@ class ConfluenceConnector(LoadConnector, PollConnector): def __init__( self, wiki_page_url: str, - index_origin: bool = True, + index_recursively: bool = True, batch_size: int = INDEX_BATCH_SIZE, continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE, # if a page has one of the labels specified in this list, we will just @@ -352,7 +385,7 @@ def __init__( self.continue_on_failure = continue_on_failure self.labels_to_skip = set(labels_to_skip) self.recursive_indexer: RecursiveIndexer | None = None - self.index_origin = index_origin + self.index_recursively = index_recursively ( self.wiki_base, self.space, @@ -369,7 +402,7 @@ def __init__( logger.info( f"wiki_base: {self.wiki_base}, space: {self.space}, page_id: {self.page_id}," - + f" space_level_scan: {self.space_level_scan}, origin: {self.index_origin}" + + f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively}" ) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: @@ -400,9 +433,7 @@ def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]: start=start_ind, limit=batch_size, status=( - "current" - if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES - else None + None if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES else "current" ), expand="body.storage.value,version", ) @@ -423,9 +454,9 @@ def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]: start=start_ind + i, limit=1, status=( - "current" - if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES - else None + None + if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES + else "current" ), expand="body.storage.value,version", ) @@ -453,10 +484,13 @@ def _fetch_page(start_ind: int, batch_size: int) -> list[dict[str, Any]]: origin_page_id=self.page_id, batch_size=self.batch_size, confluence_client=self.confluence_client, - index_origin=self.index_origin, + index_recursively=self.index_recursively, ) - return self.recursive_indexer.get_pages(start_ind, batch_size) + if self.index_recursively: + return self.recursive_indexer.get_pages(start_ind, batch_size) + else: + return self.recursive_indexer.get_origin_page() pages: list[dict[str, Any]] = [] @@ -529,134 +563,249 @@ def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str logger.exception("Ran into exception when fetching labels from Confluence") return [] + @classmethod + def _attachment_to_download_link( + cls, confluence_client: Confluence, attachment: dict[str, Any] + ) -> str: + return confluence_client.url + attachment["_links"]["download"] + + @classmethod + def _attachment_to_content( + cls, + confluence_client: Confluence, + attachment: dict[str, Any], + ) -> str | None: + """If it returns None, assume that we should skip this attachment.""" + if attachment["metadata"]["mediaType"] in [ + "image/jpeg", + "image/png", + "image/gif", + "image/svg+xml", + "video/mp4", + "video/quicktime", + ]: + return None + + download_link = cls._attachment_to_download_link(confluence_client, attachment) + + attachment_size = attachment["extensions"]["fileSize"] + if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD: + logger.warning( + f"Skipping {download_link} due to size. " + f"size={attachment_size} " + f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}" + ) + return None + + response = confluence_client._session.get(download_link) + if response.status_code != 200: + logger.warning( + f"Failed to fetch {download_link} with invalid status code {response.status_code}" + ) + return None + + extracted_text = extract_file_text( + attachment["title"], io.BytesIO(response.content), False + ) + if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD: + logger.warning( + f"Skipping {download_link} due to char count. " + f"char count={len(extracted_text)} " + f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD}" + ) + return None + + return extracted_text + def _fetch_attachments( self, confluence_client: Confluence, page_id: str, files_in_used: list[str] - ) -> str: + ) -> tuple[str, list[dict[str, Any]]]: + unused_attachments: list = [] + get_attachments_from_content = make_confluence_call_handle_rate_limit( confluence_client.get_attachments_from_content ) files_attachment_content: list = [] try: + expand = "history.lastUpdated,metadata.labels" attachments_container = get_attachments_from_content( - page_id, start=0, limit=500 + page_id, start=0, limit=500, expand=expand ) for attachment in attachments_container["results"]: - if attachment["metadata"]["mediaType"] in [ - "image/jpeg", - "image/png", - "image/gif", - "image/svg+xml", - "video/mp4", - "video/quicktime", - ]: - continue - if attachment["title"] not in files_in_used: + unused_attachments.append(attachment) continue - download_link = confluence_client.url + attachment["_links"]["download"] - response = confluence_client._session.get(download_link) - - if response.status_code == 200: - extract = extract_file_text( - attachment["title"], io.BytesIO(response.content), False - ) - files_attachment_content.append(extract) + attachment_content = self._attachment_to_content( + confluence_client, attachment + ) + if attachment_content: + files_attachment_content.append(attachment_content) except Exception as e: + if isinstance( + e, HTTPError + ) and NO_PERMISSIONS_TO_VIEW_ATTACHMENTS_ERROR_STR in str(e): + logger.warning( + f"User does not have access to attachments on page '{page_id}'" + ) + return "", [] + if not self.continue_on_failure: raise e logger.exception( f"Ran into exception when fetching attachments from Confluence: {e}" ) - return "\n".join(files_attachment_content) + return "\n".join(files_attachment_content), unused_attachments def _get_doc_batch( self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None - ) -> tuple[list[Document], int]: + ) -> tuple[list[Document], list[dict[str, Any]], int]: doc_batch: list[Document] = [] + unused_attachments: list[dict[str, Any]] = [] if self.confluence_client is None: raise ConnectorMissingCredentialError("Confluence") batch = self._fetch_pages(self.confluence_client, start_ind) for page in batch: - last_modified_str = page["version"]["when"] + last_modified = _datetime_from_string(page["version"]["when"]) author = cast(str | None, page["version"].get("by", {}).get("email")) - last_modified = datetime.fromisoformat(last_modified_str) - if last_modified.tzinfo is None: - # If no timezone info, assume it is UTC - last_modified = last_modified.replace(tzinfo=timezone.utc) - else: - # If not in UTC, translate it - last_modified = last_modified.astimezone(timezone.utc) - - if time_filter is None or time_filter(last_modified): - page_id = page["id"] - - if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: - page_labels = self._fetch_labels(self.confluence_client, page_id) - - # check disallowed labels - if self.labels_to_skip: - label_intersection = self.labels_to_skip.intersection(page_labels) - if label_intersection: - logger.info( - f"Page with ID '{page_id}' has a label which has been " - f"designated as disallowed: {label_intersection}. Skipping." - ) + if time_filter and not time_filter(last_modified): + continue - continue + page_id = page["id"] + + if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: + page_labels = self._fetch_labels(self.confluence_client, page_id) + + # check disallowed labels + if self.labels_to_skip: + label_intersection = self.labels_to_skip.intersection(page_labels) + if label_intersection: + logger.info( + f"Page with ID '{page_id}' has a label which has been " + f"designated as disallowed: {label_intersection}. Skipping." + ) - page_html = ( - page["body"] - .get("storage", page["body"].get("view", {})) - .get("value") - ) - page_url = self.wiki_base + page["_links"]["webui"] - if not page_html: - logger.debug("Page is empty, skipping: %s", page_url) continue - page_text = parse_html_page(page_html, self.confluence_client) - files_in_used = get_used_attachments(page_html, self.confluence_client) - attachment_text = self._fetch_attachments( - self.confluence_client, page_id, files_in_used + page_html = ( + page["body"].get("storage", page["body"].get("view", {})).get("value") + ) + page_url = self.wiki_base + page["_links"]["webui"] + if not page_html: + logger.debug("Page is empty, skipping: %s", page_url) + continue + page_text = parse_html_page(page_html, self.confluence_client) + + files_in_used = get_used_attachments(page_html, self.confluence_client) + attachment_text, unused_page_attachments = self._fetch_attachments( + self.confluence_client, page_id, files_in_used + ) + unused_attachments.extend(unused_page_attachments) + + page_text += attachment_text + comments_text = self._fetch_comments(self.confluence_client, page_id) + page_text += comments_text + doc_metadata: dict[str, str | list[str]] = {"Wiki Space Name": self.space} + if not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING and page_labels: + doc_metadata["labels"] = page_labels + + doc_batch.append( + Document( + id=page_url, + sections=[Section(link=page_url, text=page_text)], + source=DocumentSource.CONFLUENCE, + semantic_identifier=page["title"], + doc_updated_at=last_modified, + primary_owners=( + [BasicExpertInfo(email=author)] if author else None + ), + metadata=doc_metadata, ) - page_text += attachment_text - comments_text = self._fetch_comments(self.confluence_client, page_id) - page_text += comments_text - doc_metadata: dict[str, str | list[str]] = { - "Wiki Space Name": self.space - } - if not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING and page_labels: - doc_metadata["labels"] = page_labels - - doc_batch.append( - Document( - id=page_url, - sections=[Section(link=page_url, text=page_text)], - source=DocumentSource.CONFLUENCE, - semantic_identifier=page["title"], - doc_updated_at=last_modified, - primary_owners=( - [BasicExpertInfo(email=author)] if author else None - ), - metadata=doc_metadata, - ) + ) + return ( + doc_batch, + unused_attachments, + len(batch), + ) + + def _get_attachment_batch( + self, + start_ind: int, + attachments: list[dict[str, Any]], + time_filter: Callable[[datetime], bool] | None = None, + ) -> tuple[list[Document], int]: + doc_batch: list[Document] = [] + + if self.confluence_client is None: + raise ConnectorMissingCredentialError("Confluence") + + end_ind = min(start_ind + self.batch_size, len(attachments)) + + for attachment in attachments[start_ind:end_ind]: + last_updated = _datetime_from_string( + attachment["history"]["lastUpdated"]["when"] + ) + + if time_filter and not time_filter(last_updated): + continue + + attachment_url = self._attachment_to_download_link( + self.confluence_client, attachment + ) + attachment_content = self._attachment_to_content( + self.confluence_client, attachment + ) + if attachment_content is None: + continue + + creator_email = attachment["history"]["createdBy"].get("email") + + comment = attachment["metadata"].get("comment", "") + doc_metadata: dict[str, str | list[str]] = {"comment": comment} + + attachment_labels: list[str] = [] + if not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: + for label in attachment["metadata"]["labels"]["results"]: + attachment_labels.append(label["name"]) + + doc_metadata["labels"] = attachment_labels + + doc_batch.append( + Document( + id=attachment_url, + sections=[Section(link=attachment_url, text=attachment_content)], + source=DocumentSource.CONFLUENCE, + semantic_identifier=attachment["title"], + doc_updated_at=last_updated, + primary_owners=( + [BasicExpertInfo(email=creator_email)] + if creator_email + else None + ), + metadata=doc_metadata, ) - return doc_batch, len(batch) + ) + + return doc_batch, end_ind - start_ind def load_from_state(self) -> GenerateDocumentsOutput: + unused_attachments = [] + if self.confluence_client is None: raise ConnectorMissingCredentialError("Confluence") start_ind = 0 while True: - doc_batch, num_pages = self._get_doc_batch(start_ind) + doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( + start_ind + ) + unused_attachments.extend(unused_attachments_batch) start_ind += num_pages if doc_batch: yield doc_batch @@ -664,9 +813,23 @@ def load_from_state(self) -> GenerateDocumentsOutput: if num_pages < self.batch_size: break + start_ind = 0 + while True: + attachment_batch, num_attachments = self._get_attachment_batch( + start_ind, unused_attachments + ) + start_ind += num_attachments + if attachment_batch: + yield attachment_batch + + if num_attachments < self.batch_size: + break + def poll_source( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch ) -> GenerateDocumentsOutput: + unused_attachments = [] + if self.confluence_client is None: raise ConnectorMissingCredentialError("Confluence") @@ -675,9 +838,11 @@ def poll_source( start_ind = 0 while True: - doc_batch, num_pages = self._get_doc_batch( + doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( start_ind, time_filter=lambda t: start_time <= t <= end_time ) + unused_attachments.extend(unused_attachments_batch) + start_ind += num_pages if doc_batch: yield doc_batch @@ -685,6 +850,20 @@ def poll_source( if num_pages < self.batch_size: break + start_ind = 0 + while True: + attachment_batch, num_attachments = self._get_attachment_batch( + start_ind, + unused_attachments, + time_filter=lambda t: start_time <= t <= end_time, + ) + start_ind += num_attachments + if attachment_batch: + yield attachment_batch + + if num_attachments < self.batch_size: + break + if __name__ == "__main__": connector = ConfluenceConnector(os.environ["CONFLUENCE_TEST_SPACE_URL"]) diff --git a/backend/danswer/connectors/confluence/rate_limit_handler.py b/backend/danswer/connectors/confluence/rate_limit_handler.py index b9481d6bd46..8755b78f3f4 100644 --- a/backend/danswer/connectors/confluence/rate_limit_handler.py +++ b/backend/danswer/connectors/confluence/rate_limit_handler.py @@ -1,10 +1,14 @@ +import time from collections.abc import Callable from typing import Any from typing import cast from typing import TypeVar from requests import HTTPError -from retry import retry + +from danswer.utils.logger import setup_logger + +logger = setup_logger() F = TypeVar("F", bound=Callable[..., Any]) @@ -18,23 +22,48 @@ class ConfluenceRateLimitError(Exception): def make_confluence_call_handle_rate_limit(confluence_call: F) -> F: - @retry( - exceptions=ConfluenceRateLimitError, - tries=10, - delay=1, - max_delay=600, # 10 minutes - backoff=2, - jitter=1, - ) def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: - try: - return confluence_call(*args, **kwargs) - except HTTPError as e: - if ( - e.response.status_code == 429 - or RATE_LIMIT_MESSAGE_LOWERCASE in e.response.text.lower() - ): - raise ConfluenceRateLimitError() - raise + max_retries = 10 + starting_delay = 5 + backoff = 2 + max_delay = 600 + + for attempt in range(max_retries): + try: + return confluence_call(*args, **kwargs) + except HTTPError as e: + if ( + e.response.status_code == 429 + or RATE_LIMIT_MESSAGE_LOWERCASE in e.response.text.lower() + ): + retry_after = None + try: + retry_after = int(e.response.headers.get("Retry-After")) + except (ValueError, TypeError): + pass + + if retry_after: + logger.warning( + f"Rate limit hit. Retrying after {retry_after} seconds..." + ) + time.sleep(retry_after) + else: + logger.warning( + "Rate limit hit. Retrying with exponential backoff..." + ) + delay = min(starting_delay * (backoff**attempt), max_delay) + time.sleep(delay) + else: + # re-raise, let caller handle + raise + except AttributeError as e: + # Some error within the Confluence library, unclear why it fails. + # Users reported it to be intermittent, so just retry + logger.warning(f"Confluence Internal Error, retrying... {e}") + delay = min(starting_delay * (backoff**attempt), max_delay) + time.sleep(delay) + + if attempt == max_retries - 1: + raise e return cast(F, wrapped_call) diff --git a/backend/danswer/connectors/connector_runner.py b/backend/danswer/connectors/connector_runner.py new file mode 100644 index 00000000000..e5ad478fb7f --- /dev/null +++ b/backend/danswer/connectors/connector_runner.py @@ -0,0 +1,70 @@ +import sys +from datetime import datetime + +from danswer.connectors.interfaces import BaseConnector +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +TimeRange = tuple[datetime, datetime] + + +class ConnectorRunner: + def __init__( + self, + connector: BaseConnector, + time_range: TimeRange | None = None, + fail_loudly: bool = False, + ): + self.connector = connector + + if isinstance(self.connector, PollConnector): + if time_range is None: + raise ValueError("time_range is required for PollConnector") + + self.doc_batch_generator = self.connector.poll_source( + time_range[0].timestamp(), time_range[1].timestamp() + ) + + elif isinstance(self.connector, LoadConnector): + if time_range and fail_loudly: + raise ValueError( + "time_range specified, but passed in connector is not a PollConnector" + ) + + self.doc_batch_generator = self.connector.load_from_state() + + else: + raise ValueError(f"Invalid connector. type: {type(self.connector)}") + + def run(self) -> GenerateDocumentsOutput: + """Adds additional exception logging to the connector.""" + try: + yield from self.doc_batch_generator + except Exception: + exc_type, _, exc_traceback = sys.exc_info() + + # Traverse the traceback to find the last frame where the exception was raised + tb = exc_traceback + if tb is None: + logger.error("No traceback found for exception") + raise + + while tb.tb_next: + tb = tb.tb_next # Move to the next frame in the traceback + + # Get the local variables from the frame where the exception occurred + local_vars = tb.tb_frame.f_locals + local_vars_str = "\n".join( + f"{key}: {value}" for key, value in local_vars.items() + ) + logger.error( + f"Error in connector. type: {exc_type};\n" + f"local_vars below -> \n{local_vars_str}" + ) + raise diff --git a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py index 8faf6bfadaf..897503dca99 100644 --- a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py +++ b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py @@ -6,6 +6,7 @@ from dateutil.parser import parse +from danswer.configs.constants import IGNORE_FOR_QA from danswer.connectors.models import BasicExpertInfo from danswer.utils.text_processing import is_valid_email @@ -57,3 +58,7 @@ def process_in_batches( ) -> Iterator[list[U]]: for i in range(0, len(objects), batch_size): yield [process_function(obj) for obj in objects[i : i + batch_size]] + + +def get_metadata_keys_to_ignore() -> list[str]: + return [IGNORE_FOR_QA] diff --git a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py index 8733ca66e46..e3eeaaf617d 100644 --- a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py +++ b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py @@ -56,7 +56,7 @@ def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any: sleep_cnt = 0 while len(self.call_history) == self.max_calls: sleep_time = self.sleep_time * (self.sleep_backoff**sleep_cnt) - logger.info( + logger.notice( f"Rate limit exceeded for function {func.__name__}. " f"Waiting {sleep_time} seconds before retrying." ) diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index 212035901ab..9a8fbb31501 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -56,6 +56,16 @@ def extract_text_from_content(content: dict) -> str: return " ".join(texts) +def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any: + if hasattr(jira_issue.fields, field): + return getattr(jira_issue.fields, field) + + try: + return jira_issue.raw["fields"][field] + except Exception: + return None + + def _get_comment_strs( jira: Issue, comment_email_blacklist: tuple[str, ...] = () ) -> list[str]: @@ -117,8 +127,10 @@ def fetch_jira_issues_batch( continue comments = _get_comment_strs(jira, comment_email_blacklist) - semantic_rep = f"{jira.fields.description}\n" + "\n".join( - [f"Comment: {comment}" for comment in comments] + semantic_rep = ( + f"{jira.fields.description}\n" + if jira.fields.description + else "" + "\n".join([f"Comment: {comment}" for comment in comments]) ) page_url = f"{jira_client.client_info()}/browse/{jira.key}" @@ -147,14 +159,18 @@ def fetch_jira_issues_batch( pass metadata_dict = {} - if jira.fields.priority: - metadata_dict["priority"] = jira.fields.priority.name - if jira.fields.status: - metadata_dict["status"] = jira.fields.status.name - if jira.fields.resolution: - metadata_dict["resolution"] = jira.fields.resolution.name - if jira.fields.labels: - metadata_dict["label"] = jira.fields.labels + priority = best_effort_get_field_from_issue(jira, "priority") + if priority: + metadata_dict["priority"] = priority.name + status = best_effort_get_field_from_issue(jira, "status") + if status: + metadata_dict["status"] = status.name + resolution = best_effort_get_field_from_issue(jira, "resolution") + if resolution: + metadata_dict["resolution"] = resolution.name + labels = best_effort_get_field_from_issue(jira, "labels") + if labels: + metadata_dict["label"] = labels doc_batch.append( Document( diff --git a/backend/danswer/connectors/discourse/connector.py b/backend/danswer/connectors/discourse/connector.py index a637ff78c44..d74aad0f276 100644 --- a/backend/danswer/connectors/discourse/connector.py +++ b/backend/danswer/connectors/discourse/connector.py @@ -11,6 +11,9 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( + rate_limit_builder, +) from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import PollConnector @@ -58,67 +61,36 @@ def __init__( self.category_id_map: dict[int, str] = {} self.batch_size = batch_size - self.permissions: DiscoursePerms | None = None + self.active_categories: set | None = None + + @rate_limit_builder(max_calls=50, period=60) + def _make_request(self, endpoint: str, params: dict | None = None) -> Response: + if not self.permissions: + raise ConnectorMissingCredentialError("Discourse") + return discourse_request(endpoint, self.permissions, params) def _get_categories_map( self, ) -> None: assert self.permissions is not None categories_endpoint = urllib.parse.urljoin(self.base_url, "categories.json") - response = discourse_request( + response = self._make_request( endpoint=categories_endpoint, - perms=self.permissions, params={"include_subcategories": True}, ) categories = response.json()["category_list"]["categories"] - self.category_id_map = { - category["id"]: category["name"] - for category in categories - if not self.categories or category["name"].lower() in self.categories + cat["id"]: cat["name"] + for cat in categories + if not self.categories or cat["name"].lower() in self.categories } - - def _get_latest_topics( - self, start: datetime | None, end: datetime | None - ) -> list[int]: - assert self.permissions is not None - topic_ids = [] - - valid_categories = set(self.category_id_map.keys()) - - latest_endpoint = urllib.parse.urljoin(self.base_url, "latest.json") - response = discourse_request(endpoint=latest_endpoint, perms=self.permissions) - topics = response.json()["topic_list"]["topics"] - for topic in topics: - last_time = topic.get("last_posted_at") - if not last_time: - continue - last_time_dt = time_str_to_utc(last_time) - - if start and start > last_time_dt: - continue - if end and end < last_time_dt: - continue - - if ( - self.categories - and valid_categories - and topic.get("category_id") not in valid_categories - ): - continue - - topic_ids.append(topic["id"]) - - return topic_ids + self.active_categories = set(self.category_id_map) def _get_doc_from_topic(self, topic_id: int) -> Document: assert self.permissions is not None topic_endpoint = urllib.parse.urljoin(self.base_url, f"t/{topic_id}.json") - response = discourse_request( - endpoint=topic_endpoint, - perms=self.permissions, - ) + response = self._make_request(endpoint=topic_endpoint) topic = response.json() topic_url = urllib.parse.urljoin(self.base_url, f"t/{topic['slug']}") @@ -167,26 +139,78 @@ def _get_doc_from_topic(self, topic_id: int) -> Document: ) return doc + def _get_latest_topics( + self, start: datetime | None, end: datetime | None, page: int + ) -> list[int]: + assert self.permissions is not None + topic_ids = [] + + if not self.categories: + latest_endpoint = urllib.parse.urljoin( + self.base_url, f"latest.json?page={page}" + ) + response = self._make_request(endpoint=latest_endpoint) + topics = response.json()["topic_list"]["topics"] + + else: + topics = [] + empty_categories = [] + + for category_id in self.category_id_map.keys(): + category_endpoint = urllib.parse.urljoin( + self.base_url, f"c/{category_id}.json?page={page}&sys=latest" + ) + response = self._make_request(endpoint=category_endpoint) + new_topics = response.json()["topic_list"]["topics"] + + if len(new_topics) == 0: + empty_categories.append(category_id) + topics.extend(new_topics) + + for empty_category in empty_categories: + self.category_id_map.pop(empty_category) + + for topic in topics: + last_time = topic.get("last_posted_at") + if not last_time: + continue + + last_time_dt = time_str_to_utc(last_time) + if (start and start > last_time_dt) or (end and end < last_time_dt): + continue + + topic_ids.append(topic["id"]) + if len(topic_ids) >= self.batch_size: + break + + return topic_ids + def _yield_discourse_documents( - self, topic_ids: list[int] + self, + start: datetime, + end: datetime, ) -> GenerateDocumentsOutput: - doc_batch: list[Document] = [] - for topic_id in topic_ids: - doc_batch.append(self._get_doc_from_topic(topic_id)) - - if len(doc_batch) >= self.batch_size: + page = 1 + while topic_ids := self._get_latest_topics(start, end, page): + doc_batch: list[Document] = [] + for topic_id in topic_ids: + doc_batch.append(self._get_doc_from_topic(topic_id)) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + if doc_batch: yield doc_batch - doc_batch = [] - - if doc_batch: - yield doc_batch + page += 1 - def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + def load_credentials( + self, + credentials: dict[str, Any], + ) -> dict[str, Any] | None: self.permissions = DiscoursePerms( api_key=credentials["discourse_api_key"], api_username=credentials["discourse_api_username"], ) - return None def poll_source( @@ -194,16 +218,13 @@ def poll_source( ) -> GenerateDocumentsOutput: if self.permissions is None: raise ConnectorMissingCredentialError("Discourse") + start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc) end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc) self._get_categories_map() - latest_topic_ids = self._get_latest_topics( - start=start_datetime, end=end_datetime - ) - - yield from self._yield_discourse_documents(latest_topic_ids) + yield from self._yield_discourse_documents(start_datetime, end_datetime) if __name__ == "__main__": @@ -219,7 +240,5 @@ def poll_source( current = time.time() one_year_ago = current - 24 * 60 * 60 * 360 - latest_docs = connector.poll_source(one_year_ago, current) - print(next(latest_docs)) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 77d01394d4f..6c5501734b0 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -85,6 +85,11 @@ def _process_file( all_metadata = {**metadata, **file_metadata} if metadata else file_metadata + # add a prefix to avoid conflicts with other connectors + doc_id = f"FILE_CONNECTOR__{file_name}" + if metadata: + doc_id = metadata.get("document_id") or doc_id + # If this is set, we will show this in the UI as the "name" of the file file_display_name = all_metadata.get("file_display_name") or os.path.basename( file_name @@ -106,6 +111,7 @@ def _process_file( for k, v in all_metadata.items() if k not in [ + "document_id", "time_updated", "doc_updated_at", "link", @@ -132,7 +138,7 @@ def _process_file( return [ Document( - id=f"FILE_CONNECTOR__{file_name}", # add a prefix to avoid conflicts with other connectors + id=doc_id, sections=[ Section(link=all_metadata.get("link"), text=file_content_raw.strip()) ], diff --git a/backend/danswer/connectors/github/connector.py b/backend/danswer/connectors/github/connector.py index 89e5de551f6..aa72a3bef6e 100644 --- a/backend/danswer/connectors/github/connector.py +++ b/backend/danswer/connectors/github/connector.py @@ -38,7 +38,7 @@ def _sleep_after_rate_limit_exception(github_client: Github) -> None: tzinfo=timezone.utc ) - datetime.now(tz=timezone.utc) sleep_time += timedelta(minutes=1) # add an extra minute just to be safe - logger.info(f"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.") + logger.notice(f"Ran into Github rate-limit. Sleeping {sleep_time.seconds} seconds.") time.sleep(sleep_time.seconds) diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py index ca08f719861..ad80d1e1eb1 100644 --- a/backend/danswer/connectors/gmail/connector_auth.py +++ b/backend/danswer/connectors/gmail/connector_auth.py @@ -11,16 +11,17 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import WEB_DOMAIN -from danswer.connectors.gmail.constants import CRED_KEY +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_CRED_KEY +from danswer.configs.constants import KV_GMAIL_CRED_KEY +from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY from danswer.connectors.gmail.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.gmail.constants import GMAIL_CRED_KEY from danswer.connectors.gmail.constants import ( GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) -from danswer.connectors.gmail.constants import GMAIL_SERVICE_ACCOUNT_KEY from danswer.connectors.gmail.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User @@ -49,7 +50,7 @@ def get_gmail_creds_for_authorized_user( try: creds.refresh(Request()) if creds.valid: - logger.info("Refreshed Gmail tokens.") + logger.notice("Refreshed Gmail tokens.") return creds except Exception as e: logger.exception(f"Failed to refresh gmail access token due to: {e}") @@ -71,7 +72,7 @@ def get_gmail_creds_for_service_account( def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id))) + csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) if csrf != state: raise PermissionError( "State from Gmail Connector callback does not match expected" @@ -79,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None: def get_gmail_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -91,12 +92,14 @@ def get_gmail_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -108,7 +111,9 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) @@ -120,7 +125,7 @@ def update_gmail_credential_access_tokens( ) -> OAuthCredentials | None: app_credentials = get_google_app_gmail_cred() flow = InstalledAppFlow.from_client_config( - app_credentials.dict(), + app_credentials.model_dump(), scopes=SCOPES, redirect_uri=_build_frontend_gmail_redirect(), ) @@ -146,28 +151,29 @@ def build_service_account_creds( credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email return CredentialBase( + source=DocumentSource.GMAIL, credential_json=credential_dict, admin_public=True, ) def get_google_app_gmail_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) return GoogleAppCredentials(**json.loads(creds_str)) def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None: get_dynamic_config_store().store( - GMAIL_CRED_KEY, app_credentials.json(), encrypt=True + KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True ) def delete_google_app_gmail_cred() -> None: - get_dynamic_config_store().delete(GMAIL_CRED_KEY) + get_dynamic_config_store().delete(KV_GMAIL_CRED_KEY) def get_gmail_service_account_key() -> GoogleServiceAccountKey: - creds_str = str(get_dynamic_config_store().load(GMAIL_SERVICE_ACCOUNT_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY)) return GoogleServiceAccountKey(**json.loads(creds_str)) @@ -175,19 +181,19 @@ def upsert_gmail_service_account_key( service_account_key: GoogleServiceAccountKey, ) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def delete_gmail_service_account_key() -> None: - get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) def delete_service_account_key() -> None: - get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/gmail/constants.py b/backend/danswer/connectors/gmail/constants.py index 1660f54be70..36eff081818 100644 --- a/backend/danswer/connectors/gmail/constants.py +++ b/backend/danswer/connectors/gmail/constants.py @@ -1,7 +1,4 @@ DB_CREDENTIALS_DICT_TOKEN_KEY = "gmail_tokens" GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "gmail_delegated_user" -CRED_KEY = "credential_id_{}" -GMAIL_CRED_KEY = "gmail_app_credential" -GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"] diff --git a/backend/danswer/connectors/gong/connector.py b/backend/danswer/connectors/gong/connector.py index 9ad24a5c914..56c93f57d50 100644 --- a/backend/danswer/connectors/gong/connector.py +++ b/backend/danswer/connectors/gong/connector.py @@ -81,10 +81,10 @@ def _get_transcript_batches( for workspace in workspace_list: if workspace: - logger.info(f"Updating workspace: {workspace}") + logger.info(f"Updating Gong workspace: {workspace}") workspace_id = workspace_map.get(workspace) if not workspace_id: - logger.error(f"Invalid workspace: {workspace}") + logger.error(f"Invalid Gong workspace: {workspace}") if not self.continue_on_fail: raise ValueError(f"Invalid workspace: {workspace}") continue diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 7fa4aae8b37..40a9b73432f 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -267,7 +267,7 @@ def get_all_files_batched( yield from batch_generator( items=found_files, batch_size=batch_size, - pre_batch_yield=lambda batch_files: logger.info( + pre_batch_yield=lambda batch_files: logger.debug( f"Parseable Documents in batch: {[file['name'] for file in batch_files]}" ), ) @@ -306,24 +306,29 @@ def get_all_files_batched( def extract_text(file: dict[str, str], service: discovery.Resource) -> str: mime_type = file["mimeType"] + if mime_type not in set(item.value for item in GDriveMimeType): # Unsupported file types can still have a title, finding this way is still useful return UNSUPPORTED_FILE_TYPE_CONTENT - if mime_type == GDriveMimeType.DOC.value: - return ( - service.files() - .export(fileId=file["id"], mimeType="text/plain") - .execute() - .decode("utf-8") - ) - elif mime_type == GDriveMimeType.SPREADSHEET.value: - return ( + if mime_type in [ + GDriveMimeType.DOC.value, + GDriveMimeType.PPT.value, + GDriveMimeType.SPREADSHEET.value, + ]: + export_mime_type = "text/plain" + if mime_type == GDriveMimeType.SPREADSHEET.value: + export_mime_type = "text/csv" + elif mime_type == GDriveMimeType.PPT.value: + export_mime_type = "text/plain" + + response = ( service.files() - .export(fileId=file["id"], mimeType="text/csv") + .export(fileId=file["id"], mimeType=export_mime_type) .execute() - .decode("utf-8") ) + return response.decode("utf-8") + elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() return docx_to_text(file=io.BytesIO(response)) @@ -333,12 +338,6 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: elif mime_type == GDriveMimeType.POWERPOINT.value: response = service.files().get_media(fileId=file["id"]).execute() return pptx_to_text(file=io.BytesIO(response)) - elif mime_type == GDriveMimeType.PPT.value: - response = service.files().export( - fileId=file["id"], - mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation" - ).execute() - return pptx_to_text(file=io.BytesIO(response)) return UNSUPPORTED_FILE_TYPE_CONTENT diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index c467516f695..0f47727e6ee 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -11,7 +11,10 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import WEB_DOMAIN -from danswer.connectors.google_drive.constants import CRED_KEY +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_CRED_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) @@ -19,8 +22,6 @@ DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.google_drive.constants import GOOGLE_DRIVE_CRED_KEY -from danswer.connectors.google_drive.constants import GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY from danswer.connectors.google_drive.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User @@ -49,7 +50,7 @@ def get_google_drive_creds_for_authorized_user( try: creds.refresh(Request()) if creds.valid: - logger.info("Refreshed Google Drive tokens.") + logger.notice("Refreshed Google Drive tokens.") return creds except Exception as e: logger.exception(f"Failed to refresh google drive access token due to: {e}") @@ -71,7 +72,7 @@ def get_google_drive_creds_for_service_account( def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id))) + csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) if csrf != state: raise PermissionError( "State from Google Drive Connector callback does not match expected" @@ -79,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None: def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -91,7 +92,9 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) @@ -103,7 +106,7 @@ def update_credential_access_tokens( ) -> OAuthCredentials | None: app_credentials = get_google_app_cred() flow = InstalledAppFlow.from_client_config( - app_credentials.dict(), + app_credentials.model_dump(), scopes=SCOPES, redirect_uri=_build_frontend_google_drive_redirect(), ) @@ -118,6 +121,7 @@ def update_credential_access_tokens( def build_service_account_creds( + source: DocumentSource, delegated_user_email: str | None = None, ) -> CredentialBase: service_account_key = get_service_account_key() @@ -131,34 +135,37 @@ def build_service_account_creds( return CredentialBase( credential_json=credential_dict, admin_public=True, + source=DocumentSource.GOOGLE_DRIVE, ) def get_google_app_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) return GoogleAppCredentials(**json.loads(creds_str)) def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None: get_dynamic_config_store().store( - GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True + KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True ) def delete_google_app_cred() -> None: - get_dynamic_config_store().delete(GOOGLE_DRIVE_CRED_KEY) + get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_CRED_KEY) def get_service_account_key() -> GoogleServiceAccountKey: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)) + creds_str = str( + get_dynamic_config_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) + ) return GoogleServiceAccountKey(**json.loads(creds_str)) def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def delete_service_account_key() -> None: - get_dynamic_config_store().delete(GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/google_drive/constants.py b/backend/danswer/connectors/google_drive/constants.py index 47dc402a3c2..214bfd5cb97 100644 --- a/backend/danswer/connectors/google_drive/constants.py +++ b/backend/danswer/connectors/google_drive/constants.py @@ -1,9 +1,6 @@ DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens" DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user" -CRED_KEY = "credential_id_{}" -GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential" -GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" SCOPES = [ "https://www.googleapis.com/auth/drive.readonly", "https://www.googleapis.com/auth/drive.metadata.readonly", diff --git a/backend/danswer/connectors/guru/connector.py b/backend/danswer/connectors/guru/connector.py index 3c3c873b1bc..a27546425d3 100644 --- a/backend/danswer/connectors/guru/connector.py +++ b/backend/danswer/connectors/guru/connector.py @@ -103,6 +103,10 @@ def _process_cards( # In UI it's called Folders metadata_dict["folders"] = boards + collection = card.get("collection", {}) + if collection: + metadata_dict["collection_name"] = collection.get("name", "") + owner = card.get("owner", {}) author = None if owner: diff --git a/backend/danswer/connectors/mediawiki/wiki.py b/backend/danswer/connectors/mediawiki/wiki.py index 2283d8130cb..f4ec1e02311 100644 --- a/backend/danswer/connectors/mediawiki/wiki.py +++ b/backend/danswer/connectors/mediawiki/wiki.py @@ -86,7 +86,6 @@ class MediaWikiConnector(LoadConnector, PollConnector): categories: The categories to include in the index. pages: The pages to include in the index. recurse_depth: The depth to recurse into categories. -1 means unbounded recursion. - connector_name: The name of the connector. language_code: The language code of the wiki. batch_size: The batch size for loading documents. @@ -104,7 +103,6 @@ def __init__( categories: list[str], pages: list[str], recurse_depth: int, - connector_name: str, language_code: str = "en", batch_size: int = INDEX_BATCH_SIZE, ) -> None: @@ -118,10 +116,8 @@ def __init__( self.batch_size = batch_size # short names can only have ascii letters and digits - self.connector_name = connector_name - connector_name = "".join(ch for ch in connector_name if ch.isalnum()) - self.family = family_class_dispatch(hostname, connector_name)() + self.family = family_class_dispatch(hostname, "Wikipedia Connector")() self.site = pywikibot.Site(fam=self.family, code=language_code) self.categories = [ pywikibot.Category(self.site, f"Category:{category.replace(' ', '_')}") @@ -210,7 +206,6 @@ def poll_source( if __name__ == "__main__": HOSTNAME = "fallout.fandom.com" test_connector = MediaWikiConnector( - connector_name="Fallout", hostname=HOSTNAME, categories=["Fallout:_New_Vegas_factions"], pages=["Fallout: New Vegas"], diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index 37ed2e22bd5..192aa1b206a 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -6,6 +6,7 @@ from danswer.configs.constants import DocumentSource from danswer.configs.constants import INDEX_SEPARATOR +from danswer.configs.constants import RETURN_SEPARATOR from danswer.utils.text_processing import make_url_compatible @@ -113,11 +114,18 @@ class DocumentBase(BaseModel): title: str | None = None from_ingestion_api: bool = False - def get_title_for_document_index(self) -> str | None: + def get_title_for_document_index( + self, + ) -> str | None: # If title is explicitly empty, return a None here for embedding purposes if self.title == "": return None - return self.semantic_identifier if self.title is None else self.title + replace_chars = set(RETURN_SEPARATOR) + title = self.semantic_identifier if self.title is None else self.title + for char in replace_chars: + title = title.replace(char, " ") + title = title.strip() + return title def get_metadata_str_attributes(self) -> list[str] | None: if not self.metadata: @@ -158,6 +166,36 @@ def from_base(cls, base: DocumentBase) -> "Document": ) +class DocumentErrorSummary(BaseModel): + id: str + semantic_id: str + section_link: str | None + + @classmethod + def from_document(cls, doc: Document) -> "DocumentErrorSummary": + section_link = doc.sections[0].link if len(doc.sections) > 0 else None + return cls( + id=doc.id, semantic_id=doc.semantic_identifier, section_link=section_link + ) + + @classmethod + def from_dict(cls, data: dict) -> "DocumentErrorSummary": + return cls( + id=str(data.get("id")), + semantic_id=str(data.get("semantic_id")), + section_link=str(data.get("section_link")), + ) + + def to_dict(self) -> dict[str, str | None]: + return { + "id": self.id, + "semantic_id": self.semantic_id, + "section_link": self.section_link, + } + + class IndexAttemptMetadata(BaseModel): + batch_num: int | None = None + num_exceptions: int = 0 connector_id: int credential_id: int diff --git a/backend/danswer/connectors/slack/utils.py b/backend/danswer/connectors/slack/utils.py index ab300b48986..8650ce9ddc9 100644 --- a/backend/danswer/connectors/slack/utils.py +++ b/backend/danswer/connectors/slack/utils.py @@ -68,12 +68,13 @@ def paginated_call(**kwargs: Any) -> Generator[dict[str, Any], None, None]: def make_slack_api_rate_limited( - call: Callable[..., SlackResponse], max_retries: int = 3 + call: Callable[..., SlackResponse], max_retries: int = 7 ) -> Callable[..., SlackResponse]: """Wraps calls to slack API so that they automatically handle rate limiting""" @wraps(call) def rate_limited_call(**kwargs: Any) -> SlackResponse: + last_exception = None for _ in range(max_retries): try: # Make the API call @@ -85,14 +86,20 @@ def rate_limited_call(**kwargs: Any) -> SlackResponse: return response except SlackApiError as e: - if e.response["error"] == "ratelimited": + last_exception = e + try: + error = e.response["error"] + except KeyError: + error = "unknown error" + + if error == "ratelimited": # Handle rate limiting: get the 'Retry-After' header value and sleep for that duration retry_after = int(e.response.headers.get("Retry-After", 1)) logger.info( f"Slack call rate limited, retrying after {retry_after} seconds. Exception: {e}" ) time.sleep(retry_after) - elif e.response["error"] in ["already_reacted", "no_reaction"]: + elif error in ["already_reacted", "no_reaction"]: # The response isn't used for reactions, this is basically just a pass return e.response else: @@ -100,7 +107,11 @@ def rate_limited_call(**kwargs: Any) -> SlackResponse: raise # If the code reaches this point, all retries have been exhausted - raise Exception(f"Max retries ({max_retries}) exceeded") + msg = f"Max retries ({max_retries}) exceeded" + if last_exception: + raise Exception(msg) from last_exception + else: + raise Exception(msg) return rate_limited_call diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index 4e83284cfdc..6e76e404acd 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -15,6 +15,7 @@ from playwright.sync_api import Playwright from playwright.sync_api import sync_playwright from requests_oauthlib import OAuth2Session # type:ignore +from urllib3.exceptions import MaxRetryError from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID @@ -29,6 +30,7 @@ from danswer.file_processing.extract_file_text import pdf_to_text from danswer.file_processing.html_utils import web_html_cleanup from danswer.utils.logger import setup_logger +from danswer.utils.sitemap import list_pages_for_site logger = setup_logger() @@ -82,8 +84,28 @@ def check_internet_connection(url: str) -> None: try: response = requests.get(url, timeout=3) response.raise_for_status() - except (requests.RequestException, ValueError): - raise Exception(f"Unable to reach {url} - check your internet connection") + except requests.exceptions.HTTPError as e: + status_code = e.response.status_code + error_msg = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 500: "Internal Server Error", + 502: "Bad Gateway", + 503: "Service Unavailable", + 504: "Gateway Timeout", + }.get(status_code, "HTTP Error") + raise Exception(f"{error_msg} ({status_code}) for {url} - {e}") + except requests.exceptions.SSLError as e: + cause = ( + e.args[0].reason + if isinstance(e.args, tuple) and isinstance(e.args[0], MaxRetryError) + else e.args + ) + raise Exception(f"SSL error {str(cause)}") + except (requests.RequestException, ValueError) as e: + raise Exception(f"Unable to reach {url} - check your internet connection: {e}") def is_valid_url(url: str) -> bool: @@ -145,16 +167,21 @@ def extract_urls_from_sitemap(sitemap_url: str) -> list[str]: response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") - result = [ + urls = [ _ensure_absolute_url(sitemap_url, loc_tag.text) for loc_tag in soup.find_all("loc") ] - if not result: + + if len(urls) == 0 and len(soup.find_all("urlset")) == 0: + # the given url doesn't look like a sitemap, let's try to find one + urls = list_pages_for_site(sitemap_url) + + if len(urls) == 0: raise ValueError( f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead." ) - return result + return urls def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str: @@ -264,7 +291,7 @@ def load_from_state(self) -> GenerateDocumentsOutput: id=current_url, sections=[Section(link=current_url, text=page_text)], source=DocumentSource.WEB, - semantic_identifier=current_url.split(".")[-1], + semantic_identifier=current_url.split("/")[-1], metadata={}, ) ) diff --git a/backend/danswer/connectors/wikipedia/connector.py b/backend/danswer/connectors/wikipedia/connector.py index 2788c22c1aa..109e647f113 100644 --- a/backend/danswer/connectors/wikipedia/connector.py +++ b/backend/danswer/connectors/wikipedia/connector.py @@ -15,7 +15,6 @@ def __init__( categories: list[str], pages: list[str], recurse_depth: int, - connector_name: str, language_code: str = "en", batch_size: int = INDEX_BATCH_SIZE, ) -> None: @@ -24,7 +23,6 @@ def __init__( categories=categories, pages=pages, recurse_depth=recurse_depth, - connector_name=connector_name, language_code=language_code, batch_size=batch_size, ) diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py index fc9b703c6ec..b6d4220b9ce 100644 --- a/backend/danswer/connectors/zendesk/connector.py +++ b/backend/danswer/connectors/zendesk/connector.py @@ -1,9 +1,12 @@ from typing import Any +import requests +from retry import retry from zenpy import Zenpy # type: ignore from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( time_str_to_utc, @@ -18,12 +21,24 @@ from danswer.file_processing.html_utils import parse_html_page_basic -def _article_to_document(article: Article) -> Document: +def _article_to_document(article: Article, content_tags: dict[str, str]) -> Document: author = BasicExpertInfo( display_name=article.author.name, email=article.author.email ) update_time = time_str_to_utc(article.updated_at) - labels = [str(label) for label in article.label_names] + + # build metadata + metadata: dict[str, str | list[str]] = { + "labels": [str(label) for label in article.label_names if label], + "content_tags": [ + content_tags[tag_id] + for tag_id in article.content_tag_ids + if tag_id in content_tags + ], + } + + # remove empty values + metadata = {k: v for k, v in metadata.items() if v} return Document( id=f"article:{article.id}", @@ -34,7 +49,7 @@ def _article_to_document(article: Article) -> Document: semantic_identifier=article.title, doc_updated_at=update_time, primary_owners=[author], - metadata={"labels": labels} if labels else {}, + metadata=metadata, ) @@ -47,6 +62,42 @@ class ZendeskConnector(LoadConnector, PollConnector): def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: self.batch_size = batch_size self.zendesk_client: Zenpy | None = None + self.content_tags: dict[str, str] = {} + + @retry(tries=3, delay=2, backoff=2) + def _set_content_tags( + self, subdomain: str, email: str, token: str, page_size: int = 30 + ) -> None: + # Construct the base URL + base_url = f"https://{subdomain}.zendesk.com/api/v2/guide/content_tags" + + # Set up authentication + auth = (f"{email}/token", token) + + # Set up pagination parameters + params = {"page[size]": page_size} + + try: + while True: + # Make the GET request + response = requests.get(base_url, auth=auth, params=params) + + # Check if the request was successful + if response.status_code == 200: + data = response.json() + content_tag_list = data.get("records", []) + for tag in content_tag_list: + self.content_tags[tag["id"]] = tag["name"] + + # Check if there are more pages + if data.get("meta", {}).get("has_more", False): + params["page[after]"] = data["meta"]["after_cursor"] + else: + break + else: + raise Exception(f"Error: {response.status_code}\n{response.text}") + except Exception as e: + raise Exception(f"Error fetching content tags: {str(e)}") def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: # Subdomain is actually the whole URL @@ -61,6 +112,11 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None email=credentials["zendesk_email"], token=credentials["zendesk_token"], ) + self._set_content_tags( + subdomain, + credentials["zendesk_email"], + credentials["zendesk_token"], + ) return None def load_from_state(self) -> GenerateDocumentsOutput: @@ -81,13 +137,40 @@ def poll_source( ) doc_batch = [] for article in articles: - if article.body is None or article.draft: + if ( + article.body is None + or article.draft + or any( + label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS + for label in article.label_names + ) + ): continue - doc_batch.append(_article_to_document(article)) + doc_batch.append(_article_to_document(article, self.content_tags)) if len(doc_batch) >= self.batch_size: yield doc_batch doc_batch.clear() if doc_batch: yield doc_batch + + +if __name__ == "__main__": + import os + import time + + connector = ZendeskConnector() + connector.load_credentials( + { + "zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"], + "zendesk_email": os.environ["ZENDESK_EMAIL"], + "zendesk_token": os.environ["ZENDESK_TOKEN"], + } + ) + + current = time.time() + one_day_ago = current - 24 * 60 * 60 # 1 day + document_batches = connector.poll_source(one_day_ago, current) + + print(next(document_batches)) diff --git a/backend/danswer/connectors/zulip/schemas.py b/backend/danswer/connectors/zulip/schemas.py index 76a1cb5cc12..385272cb412 100644 --- a/backend/danswer/connectors/zulip/schemas.py +++ b/backend/danswer/connectors/zulip/schemas.py @@ -3,6 +3,7 @@ from typing import Optional from pydantic import BaseModel +from pydantic import Field class Message(BaseModel): @@ -18,11 +19,11 @@ class Message(BaseModel): sender_realm_str: str subject: str topic_links: Optional[List[Any]] = None - last_edit_timestamp: Optional[int] = None - edit_history: Any + last_edit_timestamp: Optional[int] + edit_history: Any = None reactions: List[Any] submessages: List[Any] - flags: List[str] = [] + flags: List[str] = Field(default_factory=list) display_recipient: Optional[str] = None type: Optional[str] = None stream_id: int @@ -39,4 +40,4 @@ class GetMessagesResponse(BaseModel): found_newest: Optional[bool] = None history_limited: Optional[bool] = None anchor: Optional[str] = None - messages: List[Message] = [] + messages: List[Message] = Field(default_factory=list) diff --git a/backend/danswer/danswerbot/slack/blocks.py b/backend/danswer/danswerbot/slack/blocks.py index aaf26318851..da4a867e233 100644 --- a/backend/danswer/danswerbot/slack/blocks.py +++ b/backend/danswer/danswerbot/slack/blocks.py @@ -70,6 +70,10 @@ def _process_citations_for_slack(text: str) -> str: def slack_link_format(match: Match) -> str: link_text = match.group(1) link_url = match.group(2) + + # Account for empty link citations + if link_url == "": + return f"[{link_text}]" return f"<{link_url}|[{link_text}]>" # Substitute all matches in the input text @@ -299,7 +303,9 @@ def build_sources_blocks( else [] ) + [ - MarkdownTextObject( + MarkdownTextObject(text=f"{document_title}") + if d.link == "" + else MarkdownTextObject( text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}" ), ] @@ -474,7 +480,7 @@ def build_follow_up_resolved_blocks( if tag_str: tag_str += " " - group_str = " ".join([f"" for group in group_ids]) + group_str = " ".join([f"" for group_id in group_ids]) if group_str: group_str += " " diff --git a/backend/danswer/danswerbot/slack/constants.py b/backend/danswer/danswerbot/slack/constants.py index 5eaf63c8984..cf2b38032c3 100644 --- a/backend/danswer/danswerbot/slack/constants.py +++ b/backend/danswer/danswerbot/slack/constants.py @@ -6,7 +6,6 @@ IMMEDIATE_RESOLVED_BUTTON_ACTION_ID = "immediate-resolved-button" FOLLOWUP_BUTTON_ACTION_ID = "followup-button" FOLLOWUP_BUTTON_RESOLVED_ACTION_ID = "followup-resolved-button" -SLACK_CHANNEL_ID = "channel_id" VIEW_DOC_FEEDBACK_ID = "view-doc-feedback" GENERATE_ANSWER_BUTTON_ACTION_ID = "generate-answer-button" diff --git a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py index 30c7015b963..732be8df9db 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py @@ -1,4 +1,3 @@ -import logging from typing import Any from typing import cast @@ -29,8 +28,8 @@ from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.utils import build_feedback_id from danswer.danswerbot.slack.utils import decompose_action_id -from danswer.danswerbot.slack.utils import fetch_groupids_from_names -from danswer.danswerbot.slack.utils import fetch_userids_from_emails +from danswer.danswerbot.slack.utils import fetch_group_ids_from_names +from danswer.danswerbot.slack.utils import fetch_user_ids_from_emails from danswer.danswerbot.slack.utils import get_channel_name_from_id from danswer.danswerbot.slack.utils import get_feedback_visibility from danswer.danswerbot.slack.utils import read_slack_thread @@ -43,7 +42,7 @@ from danswer.document_index.factory import get_default_document_index from danswer.utils.logger import setup_logger -logger_base = setup_logger() +logger = setup_logger() def handle_doc_feedback_button( @@ -51,7 +50,7 @@ def handle_doc_feedback_button( client: SocketModeClient, ) -> None: if not (actions := req.payload.get("actions")): - logger_base.error("Missing actions. Unable to build the source feedback view") + logger.error("Missing actions. Unable to build the source feedback view") return # Extracts the feedback_id coming from the 'source feedback' button @@ -134,7 +133,7 @@ def handle_generate_answer_button( receiver_ids=None, client=client.web_client, channel=channel_id, - logger=cast(logging.Logger, logger_base), + logger=logger, feedback_reminder_id=None, ) @@ -196,7 +195,7 @@ def handle_slack_feedback( feedback=feedback, ) else: - logger_base.error(f"Feedback type '{feedback_type}' not supported") + logger.error(f"Feedback type '{feedback_type}' not supported") if get_feedback_visibility() == FeedbackVisibility.PRIVATE or feedback_type not in [ LIKE_BLOCK_ACTION_ID, @@ -260,11 +259,11 @@ def handle_followup_button( tag_names = slack_bot_config.channel_config.get("follow_up_tags") remaining = None if tag_names: - tag_ids, remaining = fetch_userids_from_emails( + tag_ids, remaining = fetch_user_ids_from_emails( tag_names, client.web_client ) if remaining: - group_ids, _ = fetch_groupids_from_names(remaining, client.web_client) + group_ids, _ = fetch_group_ids_from_names(remaining, client.web_client) blocks = build_follow_up_resolved_blocks(tag_ids=tag_ids, group_ids=group_ids) @@ -339,7 +338,7 @@ def handle_followup_resolved_button( ) if not response.get("ok"): - logger_base.error("Unable to delete message for resolved") + logger.error("Unable to delete message for resolved") if immediate: msg_text = f"{clicker_name} has marked this question as resolved!" diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index a05006dec1e..2edbd973553 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -1,6 +1,4 @@ import datetime -import logging -from typing import cast from slack_sdk import WebClient from slack_sdk.errors import SlackApiError @@ -9,7 +7,6 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_FEEDBACK_REMINDER from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI from danswer.danswerbot.slack.blocks import get_feedback_reminder_blocks -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.handlers.handle_regular_answer import ( handle_regular_answer, ) @@ -17,15 +14,15 @@ handle_standard_answers, ) from danswer.danswerbot.slack.models import SlackMessageInfo -from danswer.danswerbot.slack.utils import ChannelIdAdapter -from danswer.danswerbot.slack.utils import fetch_userids_from_emails -from danswer.danswerbot.slack.utils import fetch_userids_from_groups +from danswer.danswerbot.slack.utils import fetch_user_ids_from_emails +from danswer.danswerbot.slack.utils import fetch_user_ids_from_groups from danswer.danswerbot.slack.utils import respond_in_thread from danswer.danswerbot.slack.utils import slack_usage_report from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import SlackBotConfig from danswer.utils.logger import setup_logger +from shared_configs.configs import SLACK_CHANNEL_ID logger_base = setup_logger() @@ -53,12 +50,8 @@ def send_msg_ack_to_user(details: SlackMessageInfo, client: WebClient) -> None: def schedule_feedback_reminder( details: SlackMessageInfo, include_followup: bool, client: WebClient ) -> str | None: - logger = cast( - logging.Logger, - ChannelIdAdapter( - logger_base, extra={SLACK_CHANNEL_ID: details.channel_to_respond} - ), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: details.channel_to_respond}) + if not DANSWER_BOT_FEEDBACK_REMINDER: logger.info("Scheduled feedback reminder disabled...") return None @@ -97,10 +90,7 @@ def schedule_feedback_reminder( def remove_scheduled_feedback_reminder( client: WebClient, channel: str | None, msg_id: str ) -> None: - logger = cast( - logging.Logger, - ChannelIdAdapter(logger_base, extra={SLACK_CHANNEL_ID: channel}), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) try: client.chat_deleteScheduledMessage( @@ -129,10 +119,7 @@ def handle_message( """ channel = message_info.channel_to_respond - logger = cast( - logging.Logger, - ChannelIdAdapter(logger_base, extra={SLACK_CHANNEL_ID: channel}), - ) + logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) messages = message_info.thread_messages sender_id = message_info.sender @@ -158,11 +145,8 @@ def handle_message( ] prompt = persona.prompts[0] if persona.prompts else None - # List of user id to send message to, if None, send to everyone in channel - send_to: list[str] | None = None respond_tag_only = False - respond_team_member_list = None - respond_slack_group_list = None + respond_member_group_list = None channel_conf = None if slack_bot_config and slack_bot_config.channel_config: @@ -184,8 +168,7 @@ def handle_message( ) respond_tag_only = channel_conf.get("respond_tag_only") or False - respond_team_member_list = channel_conf.get("respond_team_member_list") or None - respond_slack_group_list = channel_conf.get("respond_slack_group_list") or None + respond_member_group_list = channel_conf.get("respond_member_group_list", None) if respond_tag_only and not bypass_filters: logger.info( @@ -194,17 +177,23 @@ def handle_message( ) return False - if respond_team_member_list: - send_to, _ = fetch_userids_from_emails(respond_team_member_list, client) - if respond_slack_group_list: - user_ids, _ = fetch_userids_from_groups(respond_slack_group_list, client) - send_to = (send_to + user_ids) if send_to else user_ids - if send_to: - send_to = list(set(send_to)) # remove duplicates + # List of user id to send message to, if None, send to everyone in channel + send_to: list[str] | None = None + missing_users: list[str] | None = None + if respond_member_group_list: + send_to, missing_ids = fetch_user_ids_from_emails( + respond_member_group_list, client + ) + + user_ids, missing_users = fetch_user_ids_from_groups(missing_ids, client) + send_to = list(set(send_to + user_ids)) if send_to else user_ids + + if missing_users: + logger.warning(f"Failed to find these users/groups: {missing_users}") # If configured to respond to team members only, then cannot be used with a /DanswerBot command # which would just respond to the sender - if (respond_team_member_list or respond_slack_group_list) and is_bot_msg: + if send_to and is_bot_msg: if sender_id: respond_in_thread( client=client, diff --git a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py index 212c65b13df..e3a78917a76 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py @@ -1,5 +1,4 @@ import functools -import logging from collections.abc import Callable from typing import Any from typing import cast @@ -38,6 +37,7 @@ from danswer.db.models import SlackBotConfig from danswer.db.models import SlackBotResponseType from danswer.db.persona import fetch_persona_by_id +from danswer.db.search_settings import get_current_search_settings from danswer.llm.answering.prompts.citations_prompt import ( compute_max_document_tokens_for_persona, ) @@ -49,8 +49,9 @@ from danswer.one_shot_answer.models import OneShotQAResponse from danswer.search.enums import OptionalSearchSetting from danswer.search.models import BaseFilters +from danswer.search.models import RerankingDetails from danswer.search.models import RetrievalDetails -from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW +from danswer.utils.logger import DanswerLoggingAdapter srl = SlackRateLimiter() @@ -83,7 +84,7 @@ def handle_regular_answer( receiver_ids: list[str] | None, client: WebClient, channel: str, - logger: logging.Logger, + logger: DanswerLoggingAdapter, feedback_reminder_id: str | None, num_retries: int = DANSWER_BOT_NUM_RETRIES, answer_generation_timeout: int = DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, @@ -136,7 +137,6 @@ def handle_regular_answer( tries=num_retries, delay=0.25, backoff=2, - logger=logger, ) @rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to) def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | None: @@ -147,7 +147,12 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non if len(new_message_request.messages) > 1: persona = cast( Persona, - fetch_persona_by_id(db_session, new_message_request.persona_id), + fetch_persona_by_id( + db_session, + new_message_request.persona_id, + user=None, + get_editable=False, + ), ) llm, _ = get_llms_for_persona(persona) @@ -223,15 +228,24 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non enable_auto_detect_filters=auto_detect_filters, ) + # Always apply reranking settings if it exists, this is the non-streaming flow + with Session(get_sqlalchemy_engine()) as db_session: + saved_search_settings = get_current_search_settings(db_session) + # This includes throwing out answer via reflexion answer = _get_answer( DirectQARequest( messages=messages, + multilingual_query_expansion=saved_search_settings.multilingual_expansion + if saved_search_settings + else None, prompt_id=prompt.id if prompt else None, persona_id=persona.id if persona is not None else 0, retrieval_options=retrieval_details, chain_of_thought=not disable_cot, - skip_rerank=not ENABLE_RERANKING_ASYNC_FLOW, + rerank_settings=RerankingDetails.from_db_model(saved_search_settings) + if saved_search_settings + else None, ) ) except Exception as e: @@ -311,7 +325,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non ) if answer.answer_valid is False: - logger.info( + logger.notice( "Answer was evaluated to be invalid, throwing it away without responding." ) update_emote_react( @@ -349,7 +363,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non return True if not answer.answer and disable_docs_only_answer: - logger.info( + logger.notice( "Unable to find answer - not responding since the " "`DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER` env variable is set" ) diff --git a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py index b853b298b70..8e1663c1a4c 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py @@ -1,5 +1,3 @@ -import logging - from slack_sdk import WebClient from sqlalchemy.orm import Session @@ -18,7 +16,43 @@ from danswer.db.chat import get_or_create_root_message from danswer.db.models import Prompt from danswer.db.models import SlackBotConfig +from danswer.db.standard_answer import fetch_standard_answer_categories_by_names from danswer.db.standard_answer import find_matching_standard_answers +from danswer.server.manage.models import StandardAnswer +from danswer.utils.logger import DanswerLoggingAdapter +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def oneoff_standard_answers( + message: str, + slack_bot_categories: list[str], + db_session: Session, +) -> list[StandardAnswer]: + """ + Respond to the user message if it matches any configured standard answers. + + Returns a list of matching StandardAnswers if found, otherwise None. + """ + configured_standard_answers = { + standard_answer + for category in fetch_standard_answer_categories_by_names( + slack_bot_categories, db_session=db_session + ) + for standard_answer in category.standard_answers + } + + matching_standard_answers = find_matching_standard_answers( + query=message, + id_in=[answer.id for answer in configured_standard_answers], + db_session=db_session, + ) + + server_standard_answers = [ + StandardAnswer.from_model(db_answer) for db_answer in matching_standard_answers + ] + return server_standard_answers def handle_standard_answers( @@ -26,7 +60,7 @@ def handle_standard_answers( receiver_ids: list[str] | None, slack_bot_config: SlackBotConfig | None, prompt: Prompt | None, - logger: logging.Logger, + logger: DanswerLoggingAdapter, client: WebClient, db_session: Session, ) -> bool: diff --git a/backend/danswer/danswerbot/slack/listener.py b/backend/danswer/danswerbot/slack/listener.py index 4f6df76545b..c59f4caf1aa 100644 --- a/backend/danswer/danswerbot/slack/listener.py +++ b/backend/danswer/danswerbot/slack/listener.py @@ -21,7 +21,6 @@ from danswer.danswerbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.constants import VIEW_DOC_FEEDBACK_ID from danswer.danswerbot.slack.handlers.handle_buttons import handle_doc_feedback_button from danswer.danswerbot.slack.handlers.handle_buttons import handle_followup_button @@ -39,7 +38,7 @@ from danswer.danswerbot.slack.handlers.handle_message import schedule_feedback_reminder from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.tokens import fetch_tokens -from danswer.danswerbot.slack.utils import ChannelIdAdapter +from danswer.danswerbot.slack.utils import check_message_limit from danswer.danswerbot.slack.utils import decompose_action_id from danswer.danswerbot.slack.utils import get_channel_name_from_id from danswer.danswerbot.slack.utils import get_danswer_bot_app_id @@ -47,16 +46,18 @@ from danswer.danswerbot.slack.utils import remove_danswer_bot_tag from danswer.danswerbot.slack.utils import rephrase_slack_message from danswer.danswerbot.slack.utils import respond_in_thread -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.search_settings import get_current_search_settings from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder from danswer.one_shot_answer.models import ThreadMessage from danswer.search.retrieval.search_runner import download_nltk_data -from danswer.search.search_nlp_models import warm_up_encoders from danswer.server.manage.models import SlackBotTokens from danswer.utils.logger import setup_logger from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.configs import SLACK_CHANNEL_ID logger = setup_logger() @@ -84,18 +85,18 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool event = cast(dict[str, Any], req.payload.get("event", {})) msg = cast(str | None, event.get("text")) channel = cast(str | None, event.get("channel")) - channel_specific_logger = ChannelIdAdapter( - logger, extra={SLACK_CHANNEL_ID: channel} - ) + channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) # This should never happen, but we can't continue without a channel since # we can't send a response without it if not channel: - channel_specific_logger.error("Found message without channel - skipping") + channel_specific_logger.warning("Found message without channel - skipping") return False if not msg: - channel_specific_logger.error("Cannot respond to empty message - skipping") + channel_specific_logger.warning( + "Cannot respond to empty message - skipping" + ) return False if ( @@ -130,9 +131,19 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool if event_type == "message": bot_tag_id = get_danswer_bot_app_id(client.web_client) + + is_dm = event.get("channel_type") == "im" + is_tagged = bot_tag_id and bot_tag_id in msg + is_danswer_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "") + + # DanswerBot should never respond to itself + if is_danswer_bot_msg: + logger.info("Ignoring message from DanswerBot") + return False + # DMs with the bot don't pick up the @DanswerBot so we have to keep the # caught events_api - if bot_tag_id and bot_tag_id in msg and event.get("channel_type") != "im": + if is_tagged and not is_dm: # Let the tag flow handle this case, don't reply twice return False @@ -185,9 +196,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool if req.type == "slash_commands": # Verify that there's an associated channel channel = req.payload.get("channel_id") - channel_specific_logger = ChannelIdAdapter( - logger, extra={SLACK_CHANNEL_ID: channel} - ) + channel_specific_logger = setup_logger(extra={SLACK_CHANNEL_ID: channel}) + if not channel: channel_specific_logger.error( "Received DanswerBot command without channel - skipping" @@ -201,6 +211,9 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool ) return False + if not check_message_limit(): + return False + logger.debug(f"Handling Slack request with Payload: '{req.payload}'") return True @@ -230,7 +243,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: ) query_event_id, _, _ = decompose_action_id(feedback_id) - logger.info(f"Successfully handled QA feedback for event: {query_event_id}") + logger.notice(f"Successfully handled QA feedback for event: {query_event_id}") def build_request_details( @@ -247,15 +260,17 @@ def build_request_details( msg = remove_danswer_bot_tag(msg, client=client.web_client) if DANSWER_BOT_REPHRASE_MESSAGE: - logger.info(f"Rephrasing Slack message. Original message: {msg}") + logger.notice(f"Rephrasing Slack message. Original message: {msg}") try: msg = rephrase_slack_message(msg) - logger.info(f"Rephrased message: {msg}") + logger.notice(f"Rephrased message: {msg}") except Exception as e: logger.error(f"Error while trying to rephrase the Slack message: {e}") + else: + logger.notice(f"Received Slack message: {msg}") if tagged: - logger.info("User tagged DanswerBot") + logger.debug("User tagged DanswerBot") if thread_ts != message_ts and thread_ts is not None: thread_messages = read_slack_thread( @@ -437,7 +452,7 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: socket_client.socket_mode_request_listeners.append(process_slack_event) # type: ignore # Establish a WebSocket connection to the Socket Mode servers - logger.info("Listening for messages from Slack...") + logger.notice("Listening for messages from Slack...") socket_client.connect() @@ -454,7 +469,7 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: slack_bot_tokens: SlackBotTokens | None = None socket_client: SocketModeClient | None = None - logger.info("Verifying query preprocessing (NLTK) data is downloaded") + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") download_nltk_data() while True: @@ -463,18 +478,20 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: if latest_slack_bot_tokens != slack_bot_tokens: if slack_bot_tokens is not None: - logger.info("Slack Bot tokens have changed - reconnecting") + logger.notice("Slack Bot tokens have changed - reconnecting") else: # This happens on the very first time the listener process comes up # or the tokens have updated (set up for the first time) with Session(get_sqlalchemy_engine()) as db_session: - embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) + embedding_model = EmbeddingModel.from_db_model( + search_settings=search_settings, + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) - warm_up_encoders( - model_name=embedding_model.model_name, - normalize=embedding_model.normalize, - model_server_host=MODEL_SERVER_HOST, - model_server_port=MODEL_SERVER_PORT, + warm_up_bi_encoder( + embedding_model=embedding_model, ) slack_bot_tokens = latest_slack_bot_tokens diff --git a/backend/danswer/danswerbot/slack/tokens.py b/backend/danswer/danswerbot/slack/tokens.py index 16014574a31..5de3a6a0135 100644 --- a/backend/danswer/danswerbot/slack/tokens.py +++ b/backend/danswer/danswerbot/slack/tokens.py @@ -1,13 +1,11 @@ import os from typing import cast +from danswer.configs.constants import KV_SLACK_BOT_TOKENS_CONFIG_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.manage.models import SlackBotTokens -_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key" - - def fetch_tokens() -> SlackBotTokens: # first check env variables app_token = os.environ.get("DANSWER_BOT_SLACK_APP_TOKEN") @@ -17,7 +15,7 @@ def fetch_tokens() -> SlackBotTokens: dynamic_config_store = get_dynamic_config_store() return SlackBotTokens( - **cast(dict, dynamic_config_store.load(key=_SLACK_BOT_TOKENS_CONFIG_KEY)) + **cast(dict, dynamic_config_store.load(key=KV_SLACK_BOT_TOKENS_CONFIG_KEY)) ) @@ -26,5 +24,5 @@ def save_tokens( ) -> None: dynamic_config_store = get_dynamic_config_store() dynamic_config_store.store( - key=_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True + key=KV_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True ) diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index 1e5ffcc52f1..d762dde7826 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -3,7 +3,6 @@ import re import string import time -from collections.abc import MutableMapping from typing import Any from typing import cast from typing import Optional @@ -22,10 +21,15 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_QPM from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_WAIT_TIME from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES +from danswer.configs.danswerbot_configs import ( + DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD, +) +from danswer.configs.danswerbot_configs import ( + DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS, +) from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import SlackTextCleaner from danswer.danswerbot.slack.constants import FeedbackVisibility -from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.tokens import fetch_tokens from danswer.db.engine import get_sqlalchemy_engine from danswer.db.users import get_user_by_email @@ -43,7 +47,41 @@ logger = setup_logger() -DANSWER_BOT_APP_ID: str | None = None +_DANSWER_BOT_APP_ID: str | None = None +_DANSWER_BOT_MESSAGE_COUNT: int = 0 +_DANSWER_BOT_COUNT_START_TIME: float = time.time() + + +def get_danswer_bot_app_id(web_client: WebClient) -> Any: + global _DANSWER_BOT_APP_ID + if _DANSWER_BOT_APP_ID is None: + _DANSWER_BOT_APP_ID = web_client.auth_test().get("user_id") + return _DANSWER_BOT_APP_ID + + +def check_message_limit() -> bool: + """ + This isnt a perfect solution. + High traffic at the end of one period and start of another could cause + the limit to be exceeded. + """ + if DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD == 0: + return True + global _DANSWER_BOT_MESSAGE_COUNT + global _DANSWER_BOT_COUNT_START_TIME + time_since_start = time.time() - _DANSWER_BOT_COUNT_START_TIME + if time_since_start > DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS: + _DANSWER_BOT_MESSAGE_COUNT = 0 + _DANSWER_BOT_COUNT_START_TIME = time.time() + if (_DANSWER_BOT_MESSAGE_COUNT + 1) > DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD: + logger.error( + f"DanswerBot has reached the message limit {DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD}" + f" for the time period {DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS} seconds." + " These limits are configurable in backend/danswer/configs/danswerbot_configs.py" + ) + return False + _DANSWER_BOT_MESSAGE_COUNT += 1 + return True def rephrase_slack_message(msg: str) -> str: @@ -98,32 +136,11 @@ def update_emote_react( logger.error(f"Was not able to react to user message due to: {e}") -def get_danswer_bot_app_id(web_client: WebClient) -> Any: - global DANSWER_BOT_APP_ID - if DANSWER_BOT_APP_ID is None: - DANSWER_BOT_APP_ID = web_client.auth_test().get("user_id") - return DANSWER_BOT_APP_ID - - def remove_danswer_bot_tag(message_str: str, client: WebClient) -> str: bot_tag_id = get_danswer_bot_app_id(web_client=client) return re.sub(rf"<@{bot_tag_id}>\s", "", message_str) -class ChannelIdAdapter(logging.LoggerAdapter): - """This is used to add the channel ID to all log messages - emitted in this file""" - - def process( - self, msg: str, kwargs: MutableMapping[str, Any] - ) -> tuple[str, MutableMapping[str, Any]]: - channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None - if channel_id: - return f"[Channel ID: {channel_id}] {msg}", kwargs - else: - return msg, kwargs - - def get_web_client() -> WebClient: slack_tokens = fetch_tokens() return WebClient(token=slack_tokens.bot_token) @@ -302,7 +319,7 @@ def get_channel_name_from_id( raise e -def fetch_userids_from_emails( +def fetch_user_ids_from_emails( user_emails: list[str], client: WebClient ) -> tuple[list[str], list[str]]: user_ids: list[str] = [] @@ -318,57 +335,72 @@ def fetch_userids_from_emails( return user_ids, failed_to_find -def fetch_userids_from_groups( - group_names: list[str], client: WebClient +def fetch_user_ids_from_groups( + given_names: list[str], client: WebClient ) -> tuple[list[str], list[str]]: user_ids: list[str] = [] failed_to_find: list[str] = [] - for group_name in group_names: - try: - # First, find the group ID from the group name - response = client.usergroups_list() - groups = {group["name"]: group["id"] for group in response["usergroups"]} - group_id = groups.get(group_name) - - if group_id: - # Fetch user IDs for the group + try: + response = client.usergroups_list() + if not isinstance(response.data, dict): + logger.error("Error fetching user groups") + return user_ids, given_names + + all_group_data = response.data.get("usergroups", []) + name_id_map = {d["name"]: d["id"] for d in all_group_data} + handle_id_map = {d["handle"]: d["id"] for d in all_group_data} + for given_name in given_names: + group_id = name_id_map.get(given_name) or handle_id_map.get( + given_name.lstrip("@") + ) + if not group_id: + failed_to_find.append(given_name) + continue + try: response = client.usergroups_users_list(usergroup=group_id) - user_ids.extend(response["users"]) - else: - failed_to_find.append(group_name) - except Exception as e: - logger.error(f"Error fetching user IDs for group {group_name}: {str(e)}") - failed_to_find.append(group_name) + if isinstance(response.data, dict): + user_ids.extend(response.data.get("users", [])) + else: + failed_to_find.append(given_name) + except Exception as e: + logger.error(f"Error fetching user group ids: {str(e)}") + failed_to_find.append(given_name) + except Exception as e: + logger.error(f"Error fetching user groups: {str(e)}") + failed_to_find = given_names return user_ids, failed_to_find -def fetch_groupids_from_names( - names: list[str], client: WebClient +def fetch_group_ids_from_names( + given_names: list[str], client: WebClient ) -> tuple[list[str], list[str]]: - group_ids: set[str] = set() + group_data: list[str] = [] failed_to_find: list[str] = [] try: response = client.usergroups_list() - if response.get("ok") and "usergroups" in response.data: - all_groups_dicts = response.data["usergroups"] # type: ignore - name_id_map = {d["name"]: d["id"] for d in all_groups_dicts} - handle_id_map = {d["handle"]: d["id"] for d in all_groups_dicts} - for group in names: - if group in name_id_map: - group_ids.add(name_id_map[group]) - elif group in handle_id_map: - group_ids.add(handle_id_map[group]) - else: - failed_to_find.append(group) - else: - # Most likely a Slack App scope issue + if not isinstance(response.data, dict): logger.error("Error fetching user groups") + return group_data, given_names + + all_group_data = response.data.get("usergroups", []) + + name_id_map = {d["name"]: d["id"] for d in all_group_data} + handle_id_map = {d["handle"]: d["id"] for d in all_group_data} + + for given_name in given_names: + id = handle_id_map.get(given_name.lstrip("@")) + id = id or name_id_map.get(given_name) + if id: + group_data.append(id) + else: + failed_to_find.append(given_name) except Exception as e: + failed_to_find = given_names logger.error(f"Error fetching user groups: {str(e)}") - return list(group_ids), failed_to_find + return group_data, failed_to_find def fetch_user_semantic_id_from_id( diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index e2208266c84..3cb991dd43b 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -3,15 +3,20 @@ from datetime import timedelta from uuid import UUID +from sqlalchemy import and_ from sqlalchemy import delete +from sqlalchemy import desc +from sqlalchemy import func from sqlalchemy import nullsfirst from sqlalchemy import or_ from sqlalchemy import select +from sqlalchemy import update from sqlalchemy.exc import MultipleResultsFound from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session from danswer.auth.schemas import UserRole +from danswer.chat.models import DocumentRelevance from danswer.configs.chat_configs import HARD_DELETE_CHATS from danswer.configs.constants import MessageType from danswer.db.models import ChatMessage @@ -34,6 +39,7 @@ from danswer.tools.tool_runner import ToolCallFinalResult from danswer.utils.logger import setup_logger + logger = setup_logger() @@ -81,20 +87,53 @@ def get_chat_sessions_by_slack_thread_id( return db_session.scalars(stmt).all() +def get_first_messages_for_chat_sessions( + chat_session_ids: list[int], db_session: Session +) -> dict[int, str]: + subquery = ( + select(ChatMessage.chat_session_id, func.min(ChatMessage.id).label("min_id")) + .where( + and_( + ChatMessage.chat_session_id.in_(chat_session_ids), + ChatMessage.message_type == MessageType.USER, # Select USER messages + ) + ) + .group_by(ChatMessage.chat_session_id) + .subquery() + ) + + query = select(ChatMessage.chat_session_id, ChatMessage.message).join( + subquery, + (ChatMessage.chat_session_id == subquery.c.chat_session_id) + & (ChatMessage.id == subquery.c.min_id), + ) + + first_messages = db_session.execute(query).all() + return dict([(row.chat_session_id, row.message) for row in first_messages]) + + def get_chat_sessions_by_user( user_id: UUID | None, deleted: bool | None, db_session: Session, - include_one_shot: bool = False, + only_one_shot: bool = False, + limit: int = 50, ) -> list[ChatSession]: stmt = select(ChatSession).where(ChatSession.user_id == user_id) - if not include_one_shot: + if only_one_shot: + stmt = stmt.where(ChatSession.one_shot.is_(True)) + else: stmt = stmt.where(ChatSession.one_shot.is_(False)) + stmt = stmt.order_by(desc(ChatSession.time_created)) + if deleted is not None: stmt = stmt.where(ChatSession.deleted == deleted) + if limit: + stmt = stmt.limit(limit) + result = db_session.execute(stmt) chat_sessions = result.scalars().all() @@ -111,6 +150,12 @@ def delete_search_doc_message_relationship( db_session.commit() +def delete_tool_call_for_message_id(message_id: int, db_session: Session) -> None: + stmt = delete(ToolCall).where(ToolCall.message_id == message_id) + db_session.execute(stmt) + db_session.commit() + + def delete_orphaned_search_docs(db_session: Session) -> None: orphaned_docs = ( db_session.query(SearchDoc) @@ -134,6 +179,7 @@ def delete_messages_and_files_from_chat_session( ).fetchall() for id, files in messages_with_files: + delete_tool_call_for_message_id(message_id=id, db_session=db_session) delete_search_doc_message_relationship(message_id=id, db_session=db_session) for file_info in files or {}: lobj_name = file_info.get("id") @@ -275,6 +321,20 @@ def get_chat_messages_by_sessions( return db_session.execute(stmt).scalars().all() +def get_search_docs_for_chat_message( + chat_message_id: int, db_session: Session +) -> list[SearchDoc]: + stmt = ( + select(SearchDoc) + .join( + ChatMessage__SearchDoc, ChatMessage__SearchDoc.search_doc_id == SearchDoc.id + ) + .where(ChatMessage__SearchDoc.chat_message_id == chat_message_id) + ) + + return list(db_session.scalars(stmt).all()) + + def get_chat_messages_by_session( chat_session_id: int, user_id: UUID | None, @@ -295,8 +355,6 @@ def get_chat_messages_by_session( if prefetch_tool_calls: stmt = stmt.options(joinedload(ChatMessage.tool_calls)) - - if prefetch_tool_calls: result = db_session.scalars(stmt).unique().all() else: result = db_session.scalars(stmt).all() @@ -339,6 +397,34 @@ def get_or_create_root_message( return new_root_message +def reserve_message_id( + db_session: Session, + chat_session_id: int, + parent_message: int, + message_type: MessageType, +) -> int: + # Create an empty chat message + empty_message = ChatMessage( + chat_session_id=chat_session_id, + parent_message=parent_message, + latest_child_message=None, + message="", + token_count=0, + message_type=message_type, + ) + + # Add the empty message to the session + db_session.add(empty_message) + + # Flush the session to get an ID for the new chat message + db_session.flush() + + # Get the ID of the newly created message + new_id = empty_message.id + + return new_id + + def create_new_chat_message( chat_session_id: int, parent_message: ChatMessage, @@ -356,29 +442,54 @@ def create_new_chat_message( citations: dict[int, int] | None = None, tool_calls: list[ToolCall] | None = None, commit: bool = True, + reserved_message_id: int | None = None, + overridden_model: str | None = None, ) -> ChatMessage: - new_chat_message = ChatMessage( - chat_session_id=chat_session_id, - parent_message=parent_message.id, - latest_child_message=None, - message=message, - rephrased_query=rephrased_query, - prompt_id=prompt_id, - token_count=token_count, - message_type=message_type, - citations=citations, - files=files, - tool_calls=tool_calls if tool_calls else [], - error=error, - alternate_assistant_id=alternate_assistant_id, - ) + if reserved_message_id is not None: + # Edit existing message + existing_message = db_session.query(ChatMessage).get(reserved_message_id) + if existing_message is None: + raise ValueError(f"No message found with id {reserved_message_id}") + + existing_message.chat_session_id = chat_session_id + existing_message.parent_message = parent_message.id + existing_message.message = message + existing_message.rephrased_query = rephrased_query + existing_message.prompt_id = prompt_id + existing_message.token_count = token_count + existing_message.message_type = message_type + existing_message.citations = citations + existing_message.files = files + existing_message.tool_calls = tool_calls if tool_calls else [] + existing_message.error = error + existing_message.alternate_assistant_id = alternate_assistant_id + existing_message.overridden_model = overridden_model + + new_chat_message = existing_message + else: + # Create new message + new_chat_message = ChatMessage( + chat_session_id=chat_session_id, + parent_message=parent_message.id, + latest_child_message=None, + message=message, + rephrased_query=rephrased_query, + prompt_id=prompt_id, + token_count=token_count, + message_type=message_type, + citations=citations, + files=files, + tool_calls=tool_calls if tool_calls else [], + error=error, + alternate_assistant_id=alternate_assistant_id, + overridden_model=overridden_model, + ) + db_session.add(new_chat_message) # SQL Alchemy will propagate this to update the reference_docs' foreign keys if reference_docs: new_chat_message.search_docs = reference_docs - db_session.add(new_chat_message) - # Flush the session to get an ID for the new chat message db_session.flush() @@ -484,6 +595,27 @@ def get_doc_query_identifiers_from_model( return doc_query_identifiers +def update_search_docs_table_with_relevance( + db_session: Session, + reference_db_search_docs: list[SearchDoc], + relevance_summary: DocumentRelevance, +) -> None: + for search_doc in reference_db_search_docs: + relevance_data = relevance_summary.relevance_summaries.get( + search_doc.document_id + ) + if relevance_data is not None: + db_session.execute( + update(SearchDoc) + .where(SearchDoc.id == search_doc.id) + .values( + is_relevant=relevance_data.relevant, + relevance_explanation=relevance_data.content, + ) + ) + db_session.commit() + + def create_db_search_doc( server_search_doc: ServerSearchDoc, db_session: Session, @@ -498,6 +630,8 @@ def create_db_search_doc( boost=server_search_doc.boost, hidden=server_search_doc.hidden, doc_metadata=server_search_doc.metadata, + is_relevant=server_search_doc.is_relevant, + relevance_explanation=server_search_doc.relevance_explanation, # For docs further down that aren't reranked, we can't use the retrieval score score=server_search_doc.score or 0.0, match_highlights=server_search_doc.match_highlights, @@ -509,7 +643,6 @@ def create_db_search_doc( db_session.add(db_search_doc) db_session.commit() - return db_search_doc @@ -538,6 +671,8 @@ def translate_db_search_doc_to_server_search_doc( match_highlights=( db_search_doc.match_highlights if not remove_doc_content else [] ), + relevance_explanation=db_search_doc.relevance_explanation, + is_relevant=db_search_doc.is_relevant, updated_at=db_search_doc.updated_at if not remove_doc_content else None, primary_owners=db_search_doc.primary_owners if not remove_doc_content else [], secondary_owners=( @@ -561,9 +696,11 @@ def get_retrieval_docs_from_chat_message( def translate_db_message_to_chat_message_detail( - chat_message: ChatMessage, remove_doc_content: bool = False + chat_message: ChatMessage, + remove_doc_content: bool = False, ) -> ChatMessageDetail: chat_msg_detail = ChatMessageDetail( + chat_session_id=chat_message.chat_session_id, message_id=chat_message.id, parent_message=chat_message.parent_message, latest_child_message=chat_message.latest_child_message, @@ -585,6 +722,7 @@ def translate_db_message_to_chat_message_detail( for tool_call in chat_message.tool_calls ], alternate_assistant_id=chat_message.alternate_assistant_id, + overridden_model=chat_message.overridden_model, ) return chat_msg_detail diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py index 2e4b1ed4c3e..89e6977103e 100644 --- a/backend/danswer/db/connector.py +++ b/backend/danswer/db/connector.py @@ -1,7 +1,7 @@ from typing import cast -from fastapi import HTTPException from sqlalchemy import and_ +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import select from sqlalchemy.orm import aliased @@ -11,6 +11,7 @@ from danswer.configs.constants import DocumentSource from danswer.connectors.models import InputType from danswer.db.models import Connector +from danswer.db.models import ConnectorCredentialPair from danswer.db.models import IndexAttempt from danswer.server.documents.models import ConnectorBase from danswer.server.documents.models import ObjectCreationIdResponse @@ -20,19 +21,24 @@ logger = setup_logger() +def check_connectors_exist(db_session: Session) -> bool: + # Connector 0 is created on server startup as a default for ingestion + # it will always exist and we don't need to count it for this + stmt = select(exists(Connector).where(Connector.id > 0)) + result = db_session.execute(stmt) + return result.scalar() or False + + def fetch_connectors( db_session: Session, sources: list[DocumentSource] | None = None, input_types: list[InputType] | None = None, - disabled_status: bool | None = None, ) -> list[Connector]: stmt = select(Connector) if sources is not None: stmt = stmt.where(Connector.source.in_(sources)) if input_types is not None: stmt = stmt.where(Connector.input_type.in_(input_types)) - if disabled_status is not None: - stmt = stmt.where(Connector.disabled == disabled_status) results = db_session.scalars(stmt) return list(results.all()) @@ -69,8 +75,8 @@ def fetch_ingestion_connector_by_name( def create_connector( - connector_data: ConnectorBase, db_session: Session, + connector_data: ConnectorBase, ) -> ObjectCreationIdResponse: if connector_by_name_source_exists( connector_data.name, connector_data.source, db_session @@ -85,10 +91,8 @@ def create_connector( input_type=connector_data.input_type, connector_specific_config=connector_data.connector_specific_config, refresh_freq=connector_data.refresh_freq, - prune_freq=connector_data.prune_freq - if connector_data.prune_freq is not None - else DEFAULT_PRUNING_FREQ, - disabled=connector_data.disabled, + indexing_start=connector_data.indexing_start, + prune_freq=connector_data.prune_freq, ) db_session.add(connector) db_session.commit() @@ -122,33 +126,18 @@ def update_connector( if connector_data.prune_freq is not None else DEFAULT_PRUNING_FREQ ) - connector.disabled = connector_data.disabled db_session.commit() return connector -def disable_connector( - connector_id: int, - db_session: Session, -) -> StatusResponse[int]: - connector = fetch_connector_by_id(connector_id, db_session) - if connector is None: - raise HTTPException(status_code=404, detail="Connector does not exist") - - connector.disabled = True - db_session.commit() - return StatusResponse( - success=True, message="Connector deleted successfully", data=connector_id - ) - - def delete_connector( - connector_id: int, db_session: Session, + connector_id: int, ) -> StatusResponse[int]: - """Currently unused due to foreign key restriction from IndexAttempt - Use disable_connector instead""" + """Only used in special cases (e.g. a connector is in a bad state and we need to delete it). + Be VERY careful using this, as it could lead to a bad state if not used correctly. + """ connector = fetch_connector_by_id(connector_id, db_session) if connector is None: return StatusResponse( @@ -179,11 +168,9 @@ def fetch_latest_index_attempt_by_connector( latest_index_attempts: list[IndexAttempt] = [] if source: - connectors = fetch_connectors( - db_session, sources=[source], disabled_status=False - ) + connectors = fetch_connectors(db_session, sources=[source]) else: - connectors = fetch_connectors(db_session, disabled_status=False) + connectors = fetch_connectors(db_session) if not connectors: return [] @@ -191,7 +178,8 @@ def fetch_latest_index_attempt_by_connector( for connector in connectors: latest_index_attempt = ( db_session.query(IndexAttempt) - .filter(IndexAttempt.connector_id == connector.id) + .join(ConnectorCredentialPair) + .filter(ConnectorCredentialPair.connector_id == connector.id) .order_by(IndexAttempt.time_updated.desc()) .first() ) @@ -207,13 +195,11 @@ def fetch_latest_index_attempts_by_status( ) -> list[IndexAttempt]: subquery = ( db_session.query( - IndexAttempt.connector_id, - IndexAttempt.credential_id, + IndexAttempt.connector_credential_pair_id, IndexAttempt.status, func.max(IndexAttempt.time_updated).label("time_updated"), ) - .group_by(IndexAttempt.connector_id) - .group_by(IndexAttempt.credential_id) + .group_by(IndexAttempt.connector_credential_pair_id) .group_by(IndexAttempt.status) .subquery() ) @@ -223,12 +209,13 @@ def fetch_latest_index_attempts_by_status( query = db_session.query(IndexAttempt).join( alias, and_( - IndexAttempt.connector_id == alias.connector_id, - IndexAttempt.credential_id == alias.credential_id, + IndexAttempt.connector_credential_pair_id + == alias.connector_credential_pair_id, IndexAttempt.status == alias.status, IndexAttempt.time_updated == alias.time_updated, ), ) + return cast(list[IndexAttempt], query.all()) @@ -247,20 +234,29 @@ def fetch_unique_document_sources(db_session: Session) -> list[DocumentSource]: def create_initial_default_connector(db_session: Session) -> None: default_connector_id = 0 default_connector = fetch_connector_by_id(default_connector_id, db_session) - if default_connector is not None: if ( default_connector.source != DocumentSource.INGESTION_API or default_connector.input_type != InputType.LOAD_STATE or default_connector.refresh_freq is not None - or default_connector.disabled + or default_connector.name != "Ingestion API" + or default_connector.connector_specific_config != {} + or default_connector.prune_freq is not None ): - raise ValueError( - "DB is not in a valid initial state. " - "Default connector does not have expected values." + logger.warning( + "Default connector does not have expected values. Updating to proper state." ) + # Ensure default connector has correct valuesg + default_connector.source = DocumentSource.INGESTION_API + default_connector.input_type = InputType.LOAD_STATE + default_connector.refresh_freq = None + default_connector.name = "Ingestion API" + default_connector.connector_specific_config = {} + default_connector.prune_freq = None + db_session.commit() return + # Create a new default connector if it doesn't exist connector = Connector( id=default_connector_id, name="Ingestion API", diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index 314a31eddcd..a6848232caf 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -3,50 +3,154 @@ from fastapi import HTTPException from sqlalchemy import delete from sqlalchemy import desc +from sqlalchemy import exists +from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy.orm import aliased from sqlalchemy.orm import Session +from danswer.configs.constants import DocumentSource from danswer.db.connector import fetch_connector_by_id from danswer.db.credentials import fetch_credential_by_id +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import ConnectorCredentialPair -from danswer.db.models import EmbeddingModel from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.db.models import IndexModelStatus +from danswer.db.models import SearchSettings from danswer.db.models import User +from danswer.db.models import User__UserGroup +from danswer.db.models import UserGroup__ConnectorCredentialPair +from danswer.db.models import UserRole from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger logger = setup_logger() +def _add_user_filters( + stmt: Select, user: User | None, get_editable: bool = True +) -> Select: + # If user is None, assume the user is an admin or auth is disabled + if user is None or user.role == UserRole.ADMIN: + return stmt + + UG__CCpair = aliased(UserGroup__ConnectorCredentialPair) + User__UG = aliased(User__UserGroup) + + """ + Here we select cc_pairs by relation: + User -> User__UserGroup -> UserGroup__ConnectorCredentialPair -> + ConnectorCredentialPair + """ + stmt = stmt.outerjoin(UG__CCpair).outerjoin( + User__UG, + User__UG.user_group_id == UG__CCpair.user_group_id, + ) + + """ + Filter cc_pairs by: + - if the user is in the user_group that owns the cc_pair + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out cc_pairs that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all cc_pairs in the groups the user is a curator + for (as well as public cc_pairs) + """ + where_clause = User__UG.user_id == user.id + if user.role == UserRole.CURATOR and get_editable: + where_clause &= User__UG.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id) + if user.role == UserRole.CURATOR: + user_groups = user_groups.where( + User__UserGroup.is_curator == True # noqa: E712 + ) + where_clause &= ( + ~exists() + .where(UG__CCpair.cc_pair_id == ConnectorCredentialPair.id) + .where(~UG__CCpair.user_group_id.in_(user_groups)) + .correlate(ConnectorCredentialPair) + ) + else: + where_clause |= ConnectorCredentialPair.is_public == True # noqa: E712 + + return stmt.where(where_clause) + + def get_connector_credential_pairs( - db_session: Session, include_disabled: bool = True + db_session: Session, + include_disabled: bool = True, + user: User | None = None, + get_editable: bool = True, + ids: list[int] | None = None, ) -> list[ConnectorCredentialPair]: - stmt = select(ConnectorCredentialPair) + stmt = select(ConnectorCredentialPair).distinct() + stmt = _add_user_filters(stmt, user, get_editable) if not include_disabled: - stmt = stmt.where(ConnectorCredentialPair.connector.disabled == False) # noqa + stmt = stmt.where( + ConnectorCredentialPair.status == ConnectorCredentialPairStatus.ACTIVE + ) # noqa + if ids: + stmt = stmt.where(ConnectorCredentialPair.id.in_(ids)) results = db_session.scalars(stmt) return list(results.all()) +def get_cc_pair_groups_for_ids( + db_session: Session, + cc_pair_ids: list[int], + user: User | None = None, + get_editable: bool = True, +) -> list[UserGroup__ConnectorCredentialPair]: + stmt = select(UserGroup__ConnectorCredentialPair).distinct() + stmt = stmt.outerjoin( + ConnectorCredentialPair, + UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id, + ) + stmt = _add_user_filters(stmt, user, get_editable) + stmt = stmt.where(UserGroup__ConnectorCredentialPair.cc_pair_id.in_(cc_pair_ids)) + return list(db_session.scalars(stmt).all()) + + def get_connector_credential_pair( connector_id: int, credential_id: int, db_session: Session, + user: User | None = None, + get_editable: bool = True, ) -> ConnectorCredentialPair | None: stmt = select(ConnectorCredentialPair) + stmt = _add_user_filters(stmt, user, get_editable) stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id) stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id) result = db_session.execute(stmt) return result.scalar_one_or_none() +def get_connector_credential_source_from_id( + cc_pair_id: int, + db_session: Session, + user: User | None = None, + get_editable: bool = True, +) -> DocumentSource | None: + stmt = select(ConnectorCredentialPair) + stmt = _add_user_filters(stmt, user, get_editable) + stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id) + result = db_session.execute(stmt) + cc_pair = result.scalar_one_or_none() + return cc_pair.connector.source if cc_pair else None + + def get_connector_credential_pair_from_id( cc_pair_id: int, db_session: Session, + user: User | None = None, + get_editable: bool = True, ) -> ConnectorCredentialPair | None: - stmt = select(ConnectorCredentialPair) + stmt = select(ConnectorCredentialPair).distinct() + stmt = _add_user_filters(stmt, user, get_editable) stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id) result = db_session.execute(stmt) return result.scalar_one_or_none() @@ -55,12 +159,12 @@ def get_connector_credential_pair_from_id( def get_last_successful_attempt_time( connector_id: int, credential_id: int, - embedding_model: EmbeddingModel, + search_settings: SearchSettings, db_session: Session, ) -> float: """Gets the timestamp of the last successful index run stored in the CC Pair row in the database""" - if embedding_model.status == IndexModelStatus.PRESENT: + if search_settings.status == IndexModelStatus.PRESENT: connector_credential_pair = get_connector_credential_pair( connector_id, credential_id, db_session ) @@ -75,26 +179,78 @@ def get_last_successful_attempt_time( # For Secondary Index we don't keep track of the latest success, so have to calculate it live attempt = ( db_session.query(IndexAttempt) + .join( + ConnectorCredentialPair, + IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id, + ) .filter( - IndexAttempt.connector_id == connector_id, - IndexAttempt.credential_id == credential_id, - IndexAttempt.embedding_model_id == embedding_model.id, + ConnectorCredentialPair.connector_id == connector_id, + ConnectorCredentialPair.credential_id == credential_id, + IndexAttempt.search_settings_id == search_settings.id, IndexAttempt.status == IndexingStatus.SUCCESS, ) .order_by(IndexAttempt.time_started.desc()) .first() ) - if not attempt or not attempt.time_started: + connector = fetch_connector_by_id(connector_id, db_session) + if connector and connector.indexing_start: + return connector.indexing_start.timestamp() return 0.0 return attempt.time_started.timestamp() +"""Updates""" + + +def _update_connector_credential_pair( + db_session: Session, + cc_pair: ConnectorCredentialPair, + status: ConnectorCredentialPairStatus | None = None, + net_docs: int | None = None, + run_dt: datetime | None = None, +) -> None: + # simply don't update last_successful_index_time if run_dt is not specified + # at worst, this would result in re-indexing documents that were already indexed + if run_dt is not None: + cc_pair.last_successful_index_time = run_dt + if net_docs is not None: + cc_pair.total_docs_indexed += net_docs + if status is not None: + cc_pair.status = status + db_session.commit() + + +def update_connector_credential_pair_from_id( + db_session: Session, + cc_pair_id: int, + status: ConnectorCredentialPairStatus | None = None, + net_docs: int | None = None, + run_dt: datetime | None = None, +) -> None: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: + logger.warning( + f"Attempted to update pair for Connector Credential Pair '{cc_pair_id}'" + f" but it does not exist" + ) + return + + _update_connector_credential_pair( + db_session=db_session, + cc_pair=cc_pair, + status=status, + net_docs=net_docs, + run_dt=run_dt, + ) + + def update_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, + status: ConnectorCredentialPairStatus | None = None, net_docs: int | None = None, run_dt: datetime | None = None, ) -> None: @@ -105,13 +261,14 @@ def update_connector_credential_pair( f"and credential id {credential_id}" ) return - # simply don't update last_successful_index_time if run_dt is not specified - # at worst, this would result in re-indexing documents that were already indexed - if run_dt is not None: - cc_pair.last_successful_index_time = run_dt - if net_docs is not None: - cc_pair.total_docs_indexed += net_docs - db_session.commit() + + _update_connector_credential_pair( + db_session=db_session, + cc_pair=cc_pair, + status=status, + net_docs=net_docs, + run_dt=run_dt, + ) def delete_connector_credential_pair__no_commit( @@ -142,19 +299,35 @@ def associate_default_cc_pair(db_session: Session) -> None: connector_id=0, credential_id=0, name="DefaultCCPair", + status=ConnectorCredentialPairStatus.ACTIVE, + is_public=True, ) db_session.add(association) db_session.commit() +def _relate_groups_to_cc_pair__no_commit( + db_session: Session, + cc_pair_id: int, + user_group_ids: list[int], +) -> None: + for group_id in user_group_ids: + db_session.add( + UserGroup__ConnectorCredentialPair( + user_group_id=group_id, cc_pair_id=cc_pair_id + ) + ) + + def add_credential_to_connector( + db_session: Session, + user: User | None, connector_id: int, credential_id: int, cc_pair_name: str | None, is_public: bool, - user: User | None, - db_session: Session, -) -> StatusResponse[int]: + groups: list[int] | None, +) -> StatusResponse: connector = fetch_connector_by_id(connector_id, db_session) credential = fetch_credential_by_id(credential_id, user, db_session) @@ -186,15 +359,25 @@ def add_credential_to_connector( connector_id=connector_id, credential_id=credential_id, name=cc_pair_name, + status=ConnectorCredentialPairStatus.ACTIVE, is_public=is_public, ) db_session.add(association) + db_session.flush() # make sure the association has an id + + if groups: + _relate_groups_to_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=association.id, + user_group_ids=groups, + ) + db_session.commit() return StatusResponse( - success=True, - message=f"New Credential {credential_id} added to Connector", - data=connector_id, + success=False, + message=f"Connector already has Credential {credential_id}", + data=association.id, ) @@ -216,13 +399,12 @@ def remove_credential_from_connector( detail="Credential does not exist or does not belong to user", ) - association = ( - db_session.query(ConnectorCredentialPair) - .filter( - ConnectorCredentialPair.connector_id == connector_id, - ConnectorCredentialPair.credential_id == credential_id, - ) - .one_or_none() + association = get_connector_credential_pair( + connector_id=connector_id, + credential_id=credential_id, + db_session=db_session, + user=user, + get_editable=True, ) if association is not None: @@ -241,6 +423,12 @@ def remove_credential_from_connector( ) +def fetch_connector_credential_pairs( + db_session: Session, +) -> list[ConnectorCredentialPair]: + return db_session.query(ConnectorCredentialPair).all() + + def resync_cc_pair( cc_pair: ConnectorCredentialPair, db_session: Session, @@ -253,11 +441,15 @@ def find_latest_index_attempt( ) -> IndexAttempt | None: query = ( db_session.query(IndexAttempt) - .join(EmbeddingModel, IndexAttempt.embedding_model_id == EmbeddingModel.id) + .join( + ConnectorCredentialPair, + IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id, + ) + .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id) .filter( - IndexAttempt.connector_id == connector_id, - IndexAttempt.credential_id == credential_id, - EmbeddingModel.status == IndexModelStatus.PRESENT, + ConnectorCredentialPair.connector_id == connector_id, + ConnectorCredentialPair.credential_id == credential_id, + SearchSettings.status == IndexModelStatus.PRESENT, ) ) diff --git a/backend/danswer/db/credentials.py b/backend/danswer/db/credentials.py index c37bc59346c..abab904cc48 100644 --- a/backend/danswer/db/credentials.py +++ b/backend/danswer/db/credentials.py @@ -1,61 +1,150 @@ from typing import Any +from sqlalchemy import exists from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy import update from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import and_ from sqlalchemy.sql.expression import or_ from danswer.auth.schemas import UserRole +from danswer.configs.constants import DocumentSource from danswer.connectors.gmail.constants import ( GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) +from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential +from danswer.db.models import Credential__UserGroup +from danswer.db.models import DocumentByConnectorCredentialPair from danswer.db.models import User +from danswer.db.models import User__UserGroup from danswer.server.documents.models import CredentialBase +from danswer.server.documents.models import CredentialDataUpdateRequest from danswer.utils.logger import setup_logger logger = setup_logger() +# The credentials for these sources are not real so +# permissions are not enforced for them +CREDENTIAL_PERMISSIONS_TO_IGNORE = { + DocumentSource.FILE, + DocumentSource.WEB, + DocumentSource.NOT_APPLICABLE, + DocumentSource.GOOGLE_SITES, + DocumentSource.WIKIPEDIA, + DocumentSource.MEDIAWIKI, +} -def _attach_user_filters( - stmt: Select[tuple[Credential]], + +def _add_user_filters( + stmt: Select, user: User | None, assume_admin: bool = False, # Used with API key + get_editable: bool = True, ) -> Select: """Attaches filters to the statement to ensure that the user can only access the appropriate credentials""" - if user: - if user.role == UserRole.ADMIN: + if not user: + if assume_admin: + # apply admin filters minus the user_id check stmt = stmt.where( or_( - Credential.user_id == user.id, Credential.user_id.is_(None), Credential.admin_public == True, # noqa: E712 + Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE), ) ) - else: - stmt = stmt.where(Credential.user_id == user.id) - elif assume_admin: - stmt = stmt.where( + return stmt + + if user.role == UserRole.ADMIN: + # Admins can access all credentials that are public or owned by them + # or are not associated with any user + return stmt.where( or_( + Credential.user_id == user.id, Credential.user_id.is_(None), Credential.admin_public == True, # noqa: E712 + Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE), ) ) + if user.role == UserRole.BASIC: + # Basic users can only access credentials that are owned by them + return stmt.where(Credential.user_id == user.id) + + """ + THIS PART IS FOR CURATORS AND GLOBAL CURATORS + Here we select cc_pairs by relation: + User -> User__UserGroup -> Credential__UserGroup -> Credential + """ + stmt = stmt.outerjoin(Credential__UserGroup).outerjoin( + User__UserGroup, + User__UserGroup.user_group_id == Credential__UserGroup.user_group_id, + ) + """ + Filter Credentials by: + - if the user is in the user_group that owns the Credential + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out Credentials that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all Credentials in the groups the user is a curator + for (as well as public Credentials) + - if we are not editing, we return all Credentials directly connected to the user + """ + where_clause = User__UserGroup.user_id == user.id + if user.role == UserRole.CURATOR: + where_clause &= User__UserGroup.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UserGroup.user_group_id).where( + User__UserGroup.user_id == user.id + ) + if user.role == UserRole.CURATOR: + user_groups = user_groups.where( + User__UserGroup.is_curator == True # noqa: E712 + ) + where_clause &= ( + ~exists() + .where(Credential__UserGroup.credential_id == Credential.id) + .where(~Credential__UserGroup.user_group_id.in_(user_groups)) + .correlate(Credential) + ) + else: + where_clause |= Credential.curator_public == True # noqa: E712 + where_clause |= Credential.user_id == user.id # noqa: E712 + + where_clause |= Credential.source.in_(CREDENTIAL_PERMISSIONS_TO_IGNORE) + + return stmt.where(where_clause) - return stmt + +def _relate_credential_to_user_groups__no_commit( + db_session: Session, + credential_id: int, + user_group_ids: list[int], +) -> None: + credential_user_groups = [] + for group_id in user_group_ids: + credential_user_groups.append( + Credential__UserGroup( + credential_id=credential_id, + user_group_id=group_id, + ) + ) + db_session.add_all(credential_user_groups) def fetch_credentials( db_session: Session, user: User | None = None, + get_editable: bool = True, ) -> list[Credential]: stmt = select(Credential) - stmt = _attach_user_filters(stmt, user) + stmt = _add_user_filters(stmt, user, get_editable=get_editable) results = db_session.scalars(stmt) return list(results.all()) @@ -66,13 +155,78 @@ def fetch_credential_by_id( db_session: Session, assume_admin: bool = False, ) -> Credential | None: - stmt = select(Credential).where(Credential.id == credential_id) - stmt = _attach_user_filters(stmt, user, assume_admin=assume_admin) + stmt = select(Credential).distinct() + stmt = stmt.where(Credential.id == credential_id) + stmt = _add_user_filters(stmt, user, assume_admin=assume_admin) result = db_session.execute(stmt) credential = result.scalar_one_or_none() return credential +def fetch_credentials_by_source( + db_session: Session, + user: User | None, + document_source: DocumentSource | None = None, + get_editable: bool = True, +) -> list[Credential]: + base_query = select(Credential).where(Credential.source == document_source) + base_query = _add_user_filters(base_query, user, get_editable=get_editable) + credentials = db_session.execute(base_query).scalars().all() + return list(credentials) + + +def swap_credentials_connector( + new_credential_id: int, connector_id: int, user: User | None, db_session: Session +) -> ConnectorCredentialPair: + # Check if the user has permission to use the new credential + new_credential = fetch_credential_by_id(new_credential_id, user, db_session) + if not new_credential: + raise ValueError( + f"No Credential found with id {new_credential_id} or user doesn't have permission to use it" + ) + + # Existing pair + existing_pair = db_session.execute( + select(ConnectorCredentialPair).where( + ConnectorCredentialPair.connector_id == connector_id + ) + ).scalar_one_or_none() + + if not existing_pair: + raise ValueError( + f"No ConnectorCredentialPair found for connector_id {connector_id}" + ) + + # Check if the new credential is compatible with the connector + if new_credential.source != existing_pair.connector.source: + raise ValueError( + f"New credential source {new_credential.source} does not match connector source {existing_pair.connector.source}" + ) + + db_session.execute( + update(DocumentByConnectorCredentialPair) + .where( + and_( + DocumentByConnectorCredentialPair.connector_id == connector_id, + DocumentByConnectorCredentialPair.credential_id + == existing_pair.credential_id, + ) + ) + .values(credential_id=new_credential_id) + ) + + # Update the existing pair with the new credential + existing_pair.credential_id = new_credential_id + existing_pair.credential = new_credential + + # Commit the changes + db_session.commit() + + # Refresh the object to ensure all relationships are up-to-date + db_session.refresh(existing_pair) + return existing_pair + + def create_credential( credential_data: CredentialBase, user: User | None, @@ -82,13 +236,56 @@ def create_credential( credential_json=credential_data.credential_json, user_id=user.id if user else None, admin_public=credential_data.admin_public, + source=credential_data.source, + name=credential_data.name, + curator_public=credential_data.curator_public, ) db_session.add(credential) + db_session.flush() # This ensures the credential gets an ID + + _relate_credential_to_user_groups__no_commit( + db_session=db_session, + credential_id=credential.id, + user_group_ids=credential_data.groups, + ) + db_session.commit() return credential +def _cleanup_credential__user_group_relationships__no_commit( + db_session: Session, credential_id: int +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(Credential__UserGroup).filter( + Credential__UserGroup.credential_id == credential_id + ).delete(synchronize_session=False) + + +def alter_credential( + credential_id: int, + credential_data: CredentialDataUpdateRequest, + user: User, + db_session: Session, +) -> Credential | None: + # TODO: add user group relationship update + credential = fetch_credential_by_id(credential_id, user, db_session) + + if credential is None: + return None + + credential.name = credential_data.name + + # Update only the keys present in credential_data.credential_json + for key, value in credential_data.credential_json.items(): + credential.credential_json[key] = value + + credential.user_id = user.id if user is not None else None + db_session.commit() + return credential + + def update_credential( credential_id: int, credential_data: CredentialBase, @@ -135,6 +332,7 @@ def delete_credential( credential_id: int, user: User | None, db_session: Session, + force: bool = False, ) -> None: credential = fetch_credential_by_id(credential_id, user, db_session) if credential is None: @@ -142,6 +340,46 @@ def delete_credential( f"Credential by provided id {credential_id} does not exist or does not belong to user" ) + associated_connectors = ( + db_session.query(ConnectorCredentialPair) + .filter(ConnectorCredentialPair.credential_id == credential_id) + .all() + ) + + associated_doc_cc_pairs = ( + db_session.query(DocumentByConnectorCredentialPair) + .filter(DocumentByConnectorCredentialPair.credential_id == credential_id) + .all() + ) + + if associated_connectors or associated_doc_cc_pairs: + if force: + logger.warning( + f"Force deleting credential {credential_id} and its associated records" + ) + + # Delete DocumentByConnectorCredentialPair records first + for doc_cc_pair in associated_doc_cc_pairs: + db_session.delete(doc_cc_pair) + + # Then delete ConnectorCredentialPair records + for connector in associated_connectors: + db_session.delete(connector) + + # Commit these deletions before deleting the credential + db_session.flush() + else: + raise ValueError( + f"Cannot delete credential as it is still associated with " + f"{len(associated_connectors)} connector(s) and {len(associated_doc_cc_pairs)} document(s). " + ) + + if force: + logger.warning(f"Force deleting credential {credential_id}") + else: + logger.notice(f"Deleting credential {credential_id}") + + _cleanup_credential__user_group_relationships__no_commit(db_session, credential_id) db_session.delete(credential) db_session.commit() diff --git a/backend/danswer/db/deletion_attempt.py b/backend/danswer/db/deletion_attempt.py index b66e6f58520..0312047250b 100644 --- a/backend/danswer/db/deletion_attempt.py +++ b/backend/danswer/db/deletion_attempt.py @@ -1,9 +1,9 @@ from sqlalchemy.orm import Session -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.index_attempt import get_last_attempt from danswer.db.models import ConnectorCredentialPair from danswer.db.models import IndexingStatus +from danswer.db.search_settings import get_current_search_settings def check_deletion_attempt_is_allowed( @@ -13,7 +13,7 @@ def check_deletion_attempt_is_allowed( ) -> str | None: """ To be deletable: - (1) connector should be disabled + (1) connector should be paused (2) there should be no in-progress/planned index attempts Returns an error message if the deletion attempt is not allowed, otherwise None. @@ -23,17 +23,17 @@ def check_deletion_attempt_is_allowed( f"'{connector_credential_pair.credential_id}' is not deletable." ) - if not connector_credential_pair.connector.disabled: + if connector_credential_pair.status.is_active(): return base_error_msg + " Connector must be paused." connector_id = connector_credential_pair.connector_id credential_id = connector_credential_pair.credential_id - current_embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) last_indexing = get_last_attempt( connector_id=connector_id, credential_id=credential_id, - embedding_model_id=current_embedding_model.id, + search_settings_id=search_settings.id, db_session=db_session, ) diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index befb8675748..77ea4e3dd9d 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -7,6 +7,7 @@ from sqlalchemy import and_ from sqlalchemy import delete +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import or_ from sqlalchemy import select @@ -16,6 +17,7 @@ from sqlalchemy.orm import Session from danswer.configs.constants import DEFAULT_BOOST +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.feedback import delete_document_feedback_for_documents__no_commit from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential @@ -30,6 +32,12 @@ logger = setup_logger() +def check_docs_exist(db_session: Session) -> bool: + stmt = select(exists(DbDocument)) + result = db_session.execute(stmt) + return result.scalar() or False + + def get_documents_for_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, limit: int | None = None ) -> Sequence[DbDocument]: @@ -103,36 +111,19 @@ def get_document_cnts_for_cc_pairs( def get_acccess_info_for_documents( db_session: Session, document_ids: list[str], - cc_pair_to_delete: ConnectorCredentialPairIdentifier | None = None, ) -> Sequence[tuple[str, list[UUID | None], bool]]: """Gets back all relevant access info for the given documents. This includes the user_ids for cc pairs that the document is associated with + whether any of the associated cc pairs are intending to make the document globally public. - - If `cc_pair_to_delete` is specified, gets the above access info as if that - pair had been deleted. This is needed since we want to delete from the Vespa - before deleting from Postgres to ensure that the state of Postgres never "loses" - documents that still exist in Vespa. """ - stmt = select( - DocumentByConnectorCredentialPair.id, - func.array_agg(Credential.user_id).label("user_ids"), - func.bool_or(ConnectorCredentialPair.is_public).label("public_doc"), - ).where(DocumentByConnectorCredentialPair.id.in_(document_ids)) - - # pretend that the specified cc pair doesn't exist - if cc_pair_to_delete: - stmt = stmt.where( - and_( - DocumentByConnectorCredentialPair.connector_id - != cc_pair_to_delete.connector_id, - DocumentByConnectorCredentialPair.credential_id - != cc_pair_to_delete.credential_id, - ) - ) - stmt = ( - stmt.join( + select( + DocumentByConnectorCredentialPair.id, + func.array_agg(Credential.user_id).label("user_ids"), + func.bool_or(ConnectorCredentialPair.is_public).label("public_doc"), + ) + .where(DocumentByConnectorCredentialPair.id.in_(document_ids)) + .join( Credential, DocumentByConnectorCredentialPair.credential_id == Credential.id, ) @@ -145,6 +136,9 @@ def get_acccess_info_for_documents( == ConnectorCredentialPair.credential_id, ), ) + # don't include CC pairs that are being deleted + # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them + .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING) .group_by(DocumentByConnectorCredentialPair.id) ) return db_session.execute(stmt).all() # type: ignore @@ -311,7 +305,7 @@ def acquire_document_locks(db_session: Session, document_ids: list[str]) -> bool _NUM_LOCK_ATTEMPTS = 10 -_LOCK_RETRY_DELAY = 30 +_LOCK_RETRY_DELAY = 10 @contextlib.contextmanager @@ -323,7 +317,7 @@ def prepare_to_modify_documents( called ahead of any modification to Vespa. Locks should be released by the caller as soon as updates are complete by finishing the transaction. - NOTE: only one commit is allowed within the context manager returned by this funtion. + NOTE: only one commit is allowed within the context manager returned by this function. Multiple commits will result in a sqlalchemy.exc.InvalidRequestError. NOTE: this function will commit any existing transaction. """ @@ -341,7 +335,9 @@ def prepare_to_modify_documents( yield transaction break except OperationalError as e: - logger.info(f"Failed to acquire locks for documents, retrying. Error: {e}") + logger.warning( + f"Failed to acquire locks for documents, retrying. Error: {e}" + ) time.sleep(retry_delay) diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 51064f78e2f..2de61a491f9 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -4,20 +4,79 @@ from sqlalchemy import and_ from sqlalchemy import delete +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import or_ +from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy.orm import aliased from sqlalchemy.orm import Session +from danswer.db.connector_credential_pair import get_cc_pair_groups_for_ids +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document from danswer.db.models import DocumentByConnectorCredentialPair from danswer.db.models import DocumentSet as DocumentSetDBModel from danswer.db.models import DocumentSet__ConnectorCredentialPair +from danswer.db.models import DocumentSet__UserGroup +from danswer.db.models import User +from danswer.db.models import User__UserGroup +from danswer.db.models import UserRole from danswer.server.features.document_set.models import DocumentSetCreationRequest from danswer.server.features.document_set.models import DocumentSetUpdateRequest +from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import fetch_versioned_implementation +logger = setup_logger() + + +def _add_user_filters( + stmt: Select, user: User | None, get_editable: bool = True +) -> Select: + # If user is None, assume the user is an admin or auth is disabled + if user is None or user.role == UserRole.ADMIN: + return stmt + + DocumentSet__UG = aliased(DocumentSet__UserGroup) + User__UG = aliased(User__UserGroup) + """ + Here we select cc_pairs by relation: + User -> User__UserGroup -> DocumentSet__UserGroup -> DocumentSet + """ + stmt = stmt.outerjoin(DocumentSet__UG).outerjoin( + User__UserGroup, + User__UserGroup.user_group_id == DocumentSet__UG.user_group_id, + ) + """ + Filter DocumentSets by: + - if the user is in the user_group that owns the DocumentSet + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out DocumentSets that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all DocumentSets in the groups the user is a curator + for (as well as public DocumentSets) + """ + where_clause = User__UserGroup.user_id == user.id + if user.role == UserRole.CURATOR and get_editable: + where_clause &= User__UserGroup.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id) + if user.role == UserRole.CURATOR: + user_groups = user_groups.where(User__UG.is_curator == True) # noqa: E712 + where_clause &= ( + ~exists() + .where(DocumentSet__UG.document_set_id == DocumentSetDBModel.id) + .where(~DocumentSet__UG.user_group_id.in_(user_groups)) + .correlate(DocumentSetDBModel) + ) + else: + where_clause |= DocumentSetDBModel.is_public == True # noqa: E712 + + return stmt.where(where_clause) + def _delete_document_set_cc_pairs__no_commit( db_session: Session, document_set_id: int, is_current: bool | None = None @@ -49,11 +108,15 @@ def delete_document_set_privacy__no_commit( def get_document_set_by_id( - db_session: Session, document_set_id: int + db_session: Session, + document_set_id: int, + user: User | None = None, + get_editable: bool = True, ) -> DocumentSetDBModel | None: - return db_session.scalar( - select(DocumentSetDBModel).where(DocumentSetDBModel.id == document_set_id) - ) + stmt = select(DocumentSetDBModel).distinct() + stmt = stmt.where(DocumentSetDBModel.id == document_set_id) + stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable) + return db_session.scalar(stmt) def get_document_set_by_name( @@ -85,6 +148,45 @@ def make_doc_set_private( raise NotImplementedError("Danswer MIT does not support private Document Sets") +def _check_if_cc_pairs_are_owned_by_groups( + db_session: Session, + cc_pair_ids: list[int], + group_ids: list[int], +) -> None: + """ + This function checks if the CC pairs are owned by the specified groups or public. + If not, it raises a ValueError. + """ + group_cc_pair_relationships = get_cc_pair_groups_for_ids( + db_session=db_session, + cc_pair_ids=cc_pair_ids, + ) + + group_cc_pair_relationships_set = { + (relationship.cc_pair_id, relationship.user_group_id) + for relationship in group_cc_pair_relationships + } + + missing_cc_pair_ids = [] + for cc_pair_id in cc_pair_ids: + for group_id in group_ids: + if (cc_pair_id, group_id) not in group_cc_pair_relationships_set: + missing_cc_pair_ids.append(cc_pair_id) + break + + if missing_cc_pair_ids: + cc_pairs = get_connector_credential_pairs( + db_session=db_session, + ids=missing_cc_pair_ids, + ) + for cc_pair in cc_pairs: + if not cc_pair.is_public: + raise ValueError( + f"Connector Credential Pair with ID: '{cc_pair.id}'" + " is not owned by the specified groups" + ) + + def insert_document_set( document_set_creation_request: DocumentSetCreationRequest, user_id: UUID | None, @@ -94,8 +196,12 @@ def insert_document_set( # It's cc-pairs in actuality but the UI displays this error raise ValueError("Cannot create a document set with no Connectors") - # start a transaction - db_session.begin() + if not document_set_creation_request.is_public: + _check_if_cc_pairs_are_owned_by_groups( + db_session=db_session, + cc_pair_ids=document_set_creation_request.cc_pair_ids, + group_ids=document_set_creation_request.groups or [], + ) try: new_document_set_row = DocumentSetDBModel( @@ -130,27 +236,36 @@ def insert_document_set( ) db_session.commit() - except: + except Exception as e: db_session.rollback() - raise + logger.error(f"Error creating document set: {e}") return new_document_set_row, ds_cc_pairs def update_document_set( - document_set_update_request: DocumentSetUpdateRequest, db_session: Session + db_session: Session, + document_set_update_request: DocumentSetUpdateRequest, + user: User | None = None, ) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]: if not document_set_update_request.cc_pair_ids: # It's cc-pairs in actuality but the UI displays this error raise ValueError("Cannot create a document set with no Connectors") - # start a transaction - db_session.begin() + if not document_set_update_request.is_public: + _check_if_cc_pairs_are_owned_by_groups( + db_session=db_session, + cc_pair_ids=document_set_update_request.cc_pair_ids, + group_ids=document_set_update_request.groups, + ) try: # update the description document_set_row = get_document_set_by_id( - db_session=db_session, document_set_id=document_set_update_request.id + db_session=db_session, + document_set_id=document_set_update_request.id, + user=user, + get_editable=True, ) if document_set_row is None: raise ValueError( @@ -228,20 +343,26 @@ def delete_document_set( def mark_document_set_as_to_be_deleted( - document_set_id: int, db_session: Session + db_session: Session, + document_set_id: int, + user: User | None = None, ) -> None: """Cleans up all document_set -> cc_pair relationships and marks the document set as needing an update. The actual document set row will be deleted by the background job which syncs these changes to Vespa.""" - # start a transaction - db_session.begin() try: document_set_row = get_document_set_by_id( - db_session=db_session, document_set_id=document_set_id + db_session=db_session, + document_set_id=document_set_id, + user=user, + get_editable=True, ) if document_set_row is None: - raise ValueError(f"No document set with ID: '{document_set_id}'") + error_msg = f"Document set with ID: '{document_set_id}' does not exist " + if user is not None: + error_msg += f"or is not editable by user with email: '{user.email}'" + raise ValueError(error_msg) if not document_set_row.is_up_to_date: raise ValueError( "Cannot delete document set while it is syncing. Please wait " @@ -270,37 +391,20 @@ def mark_document_set_as_to_be_deleted( raise -def mark_cc_pair__document_set_relationships_to_be_deleted__no_commit( - cc_pair_id: int, db_session: Session -) -> set[int]: - """Marks all CC Pair -> Document Set relationships for the specified - `cc_pair_id` as not current and returns the list of all document set IDs - affected. - - NOTE: rases a `ValueError` if any of the document sets are currently syncing - to avoid getting into a bad state.""" - document_set__cc_pair_relationships = db_session.scalars( - select(DocumentSet__ConnectorCredentialPair).where( +def delete_document_set_cc_pair_relationship__no_commit( + connector_id: int, credential_id: int, db_session: Session +) -> None: + """Deletes all rows from DocumentSet__ConnectorCredentialPair where the + connector_credential_pair_id matches the given cc_pair_id.""" + delete_stmt = delete(DocumentSet__ConnectorCredentialPair).where( + and_( + ConnectorCredentialPair.connector_id == connector_id, + ConnectorCredentialPair.credential_id == credential_id, DocumentSet__ConnectorCredentialPair.connector_credential_pair_id - == cc_pair_id + == ConnectorCredentialPair.id, ) - ).all() - - document_set_ids_touched: set[int] = set() - for document_set__cc_pair_relationship in document_set__cc_pair_relationships: - document_set__cc_pair_relationship.is_current = False - - if not document_set__cc_pair_relationship.document_set.is_up_to_date: - raise ValueError( - "Cannot delete CC pair while it is attached to a document set " - "that is syncing. Please wait for the document set to finish " - "syncing, and then try again." - ) - - document_set__cc_pair_relationship.document_set.is_up_to_date = False - document_set_ids_touched.add(document_set__cc_pair_relationship.document_set_id) - - return document_set_ids_touched + ) + db_session.execute(delete_stmt) def fetch_document_sets( @@ -357,29 +461,14 @@ def fetch_document_sets( ] -def fetch_all_document_sets(db_session: Session) -> Sequence[DocumentSetDBModel]: - """Used for Admin UI where they should have visibility into all document sets""" - return db_session.scalars(select(DocumentSetDBModel)).all() - - -def fetch_user_document_sets( - user_id: UUID | None, db_session: Session -) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]: - # If Auth is turned off, all document sets become visible - # document sets are not permission enforced, only for organizational purposes - # the documents themselves are permission enforced - if user_id is None: - return fetch_document_sets( - user_id=user_id, db_session=db_session, include_outdated=True - ) - - versioned_fetch_doc_sets_fn = fetch_versioned_implementation( - "danswer.db.document_set", "fetch_document_sets" - ) - - return versioned_fetch_doc_sets_fn( - user_id=user_id, db_session=db_session, include_outdated=True - ) +def fetch_all_document_sets_for_user( + db_session: Session, + user: User | None = None, + get_editable: bool = True, +) -> Sequence[DocumentSetDBModel]: + stmt = select(DocumentSetDBModel).distinct() + stmt = _add_user_filters(stmt, user, get_editable=get_editable) + return db_session.scalars(stmt).all() def fetch_documents_for_document_set_paginated( @@ -431,8 +520,10 @@ def fetch_documents_for_document_set_paginated( def fetch_document_sets_for_documents( - document_ids: list[str], db_session: Session + document_ids: list[str], + db_session: Session, ) -> Sequence[tuple[str, list[str]]]: + """Gives back a list of (document_id, list[document_set_names]) tuples""" stmt = ( select(Document.id, func.array_agg(DocumentSetDBModel.name)) .join( @@ -459,6 +550,10 @@ def fetch_document_sets_for_documents( Document.id == DocumentByConnectorCredentialPair.id, ) .where(Document.id.in_(document_ids)) + # don't include CC pairs that are being deleted + # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them + # as we can assume their document sets are no longer relevant + .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING) .where(DocumentSet__ConnectorCredentialPair.is_current == True) # noqa: E712 .group_by(Document.id) ) diff --git a/backend/danswer/db/embedding_model.py b/backend/danswer/db/embedding_model.py deleted file mode 100644 index ae2b98d514f..00000000000 --- a/backend/danswer/db/embedding_model.py +++ /dev/null @@ -1,115 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.orm import Session - -from danswer.configs.model_configs import ASYM_PASSAGE_PREFIX -from danswer.configs.model_configs import ASYM_QUERY_PREFIX -from danswer.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL -from danswer.configs.model_configs import DOC_EMBEDDING_DIM -from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL -from danswer.configs.model_configs import NORMALIZE_EMBEDDINGS -from danswer.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL -from danswer.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM -from danswer.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS -from danswer.db.models import EmbeddingModel -from danswer.db.models import IndexModelStatus -from danswer.indexing.models import EmbeddingModelDetail -from danswer.search.search_nlp_models import clean_model_name -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -def create_embedding_model( - model_details: EmbeddingModelDetail, - db_session: Session, - status: IndexModelStatus = IndexModelStatus.FUTURE, -) -> EmbeddingModel: - embedding_model = EmbeddingModel( - model_name=model_details.model_name, - model_dim=model_details.model_dim, - normalize=model_details.normalize, - query_prefix=model_details.query_prefix, - passage_prefix=model_details.passage_prefix, - status=status, - # Every single embedding model except the initial one from migrations has this name - # The initial one from migration is called "danswer_chunk" - index_name=f"danswer_chunk_{clean_model_name(model_details.model_name)}", - ) - - db_session.add(embedding_model) - db_session.commit() - - return embedding_model - - -def get_current_db_embedding_model(db_session: Session) -> EmbeddingModel: - query = ( - select(EmbeddingModel) - .where(EmbeddingModel.status == IndexModelStatus.PRESENT) - .order_by(EmbeddingModel.id.desc()) - ) - result = db_session.execute(query) - latest_model = result.scalars().first() - - if not latest_model: - raise RuntimeError("No embedding model selected, DB is not in a valid state") - - return latest_model - - -def get_secondary_db_embedding_model(db_session: Session) -> EmbeddingModel | None: - query = ( - select(EmbeddingModel) - .where(EmbeddingModel.status == IndexModelStatus.FUTURE) - .order_by(EmbeddingModel.id.desc()) - ) - result = db_session.execute(query) - latest_model = result.scalars().first() - - return latest_model - - -def update_embedding_model_status( - embedding_model: EmbeddingModel, new_status: IndexModelStatus, db_session: Session -) -> None: - embedding_model.status = new_status - db_session.commit() - - -def user_has_overridden_embedding_model() -> bool: - return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL - - -def get_old_default_embedding_model() -> EmbeddingModel: - is_overridden = user_has_overridden_embedding_model() - return EmbeddingModel( - model_name=( - DOCUMENT_ENCODER_MODEL - if is_overridden - else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL - ), - model_dim=( - DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM - ), - normalize=( - NORMALIZE_EMBEDDINGS - if is_overridden - else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS - ), - query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""), - passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""), - status=IndexModelStatus.PRESENT, - index_name="danswer_chunk", - ) - - -def get_new_default_embedding_model(is_present: bool) -> EmbeddingModel: - return EmbeddingModel( - model_name=DOCUMENT_ENCODER_MODEL, - model_dim=DOC_EMBEDDING_DIM, - normalize=NORMALIZE_EMBEDDINGS, - query_prefix=ASYM_QUERY_PREFIX, - passage_prefix=ASYM_PASSAGE_PREFIX, - status=IndexModelStatus.PRESENT if is_present else IndexModelStatus.FUTURE, - index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}", - ) diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index b8a7f858fd2..208dbdba3c1 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -1,9 +1,11 @@ import contextlib +import time from collections.abc import AsyncGenerator from collections.abc import Generator from datetime import datetime from typing import ContextManager +from sqlalchemy import event from sqlalchemy import text from sqlalchemy.engine import create_engine from sqlalchemy.engine import Engine @@ -14,11 +16,16 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool +from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS +from danswer.configs.app_configs import LOG_POSTGRES_LATENCY from danswer.configs.app_configs import POSTGRES_DB from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD +from danswer.configs.app_configs import POSTGRES_POOL_PRE_PING +from danswer.configs.app_configs import POSTGRES_POOL_RECYCLE from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER +from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME from danswer.utils.logger import setup_logger logger = setup_logger() @@ -26,12 +33,70 @@ SYNC_DB_API = "psycopg2" ASYNC_DB_API = "asyncpg" +POSTGRES_APP_NAME = ( + POSTGRES_UNKNOWN_APP_NAME # helps to diagnose open connections in postgres +) + # global so we don't create more than one engine per process # outside of being best practice, this is needed so we can properly pool # connections and not create a new pool on every request _SYNC_ENGINE: Engine | None = None _ASYNC_ENGINE: AsyncEngine | None = None +SessionFactory: sessionmaker[Session] | None = None + + +if LOG_POSTGRES_LATENCY: + # Function to log before query execution + @event.listens_for(Engine, "before_cursor_execute") + def before_cursor_execute( # type: ignore + conn, cursor, statement, parameters, context, executemany + ): + conn.info["query_start_time"] = time.time() + + # Function to log after query execution + @event.listens_for(Engine, "after_cursor_execute") + def after_cursor_execute( # type: ignore + conn, cursor, statement, parameters, context, executemany + ): + total_time = time.time() - conn.info["query_start_time"] + # don't spam TOO hard + if total_time > 0.1: + logger.debug( + f"Query Complete: {statement}\n\nTotal Time: {total_time:.4f} seconds" + ) + + +if LOG_POSTGRES_CONN_COUNTS: + # Global counter for connection checkouts and checkins + checkout_count = 0 + checkin_count = 0 + + @event.listens_for(Engine, "checkout") + def log_checkout(dbapi_connection, connection_record, connection_proxy): # type: ignore + global checkout_count + checkout_count += 1 + + active_connections = connection_proxy._pool.checkedout() + idle_connections = connection_proxy._pool.checkedin() + pool_size = connection_proxy._pool.size() + logger.debug( + "Connection Checkout\n" + f"Active Connections: {active_connections};\n" + f"Idle: {idle_connections};\n" + f"Pool Size: {pool_size};\n" + f"Total connection checkouts: {checkout_count}" + ) + + @event.listens_for(Engine, "checkin") + def log_checkin(dbapi_connection, connection_record): # type: ignore + global checkin_count + checkin_count += 1 + logger.debug(f"Total connection checkins: {checkin_count}") + + +"""END DEBUGGING LOGGING""" + def get_db_current_time(db_session: Session) -> datetime: """Get the current time from Postgres representing the start of the transaction @@ -52,17 +117,32 @@ def build_connection_string( host: str = POSTGRES_HOST, port: str = POSTGRES_PORT, db: str = POSTGRES_DB, + app_name: str | None = None, ) -> str: + if app_name: + return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}" + return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}" +def init_sqlalchemy_engine(app_name: str) -> None: + global POSTGRES_APP_NAME + POSTGRES_APP_NAME = app_name + + def get_sqlalchemy_engine() -> Engine: connect_args = {"sslmode": "disable"} global _SYNC_ENGINE if _SYNC_ENGINE is None: - connection_string = build_connection_string(db_api=SYNC_DB_API) + connection_string = build_connection_string( + db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync" + ) _SYNC_ENGINE = create_engine( - connection_string, pool_size=40, max_overflow=10, connect_args=connect_args + connection_string, + pool_size=40, + max_overflow=10, + pool_pre_ping=POSTGRES_POOL_PRE_PING, + pool_recycle=POSTGRES_POOL_RECYCLE, ) return _SYNC_ENGINE @@ -71,9 +151,18 @@ def get_sqlalchemy_async_engine() -> AsyncEngine: connect_args = {"ssl": "disable"} global _ASYNC_ENGINE if _ASYNC_ENGINE is None: + # underlying asyncpg cannot accept application_name directly in the connection string + # https://github.com/MagicStack/asyncpg/issues/798 connection_string = build_connection_string() _ASYNC_ENGINE = create_async_engine( - connection_string, pool_size=40, max_overflow=10, connect_args=connect_args + connection_string, + connect_args={ + "server_settings": {"application_name": POSTGRES_APP_NAME + "_async"} + }, + pool_size=40, + max_overflow=10, + pool_pre_ping=POSTGRES_POOL_PRE_PING, + pool_recycle=POSTGRES_POOL_RECYCLE, ) return _ASYNC_ENGINE @@ -98,7 +187,7 @@ async def get_async_session() -> AsyncGenerator[AsyncSession, None]: async def warm_up_connections( - sync_connections_to_warm_up: int = 10, async_connections_to_warm_up: int = 10 + sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20 ) -> None: sync_postgres_engine = get_sqlalchemy_engine() connections = [ @@ -120,4 +209,8 @@ async def warm_up_connections( await async_conn.close() -SessionFactory = sessionmaker(bind=get_sqlalchemy_engine()) +def get_session_factory() -> sessionmaker[Session]: + global SessionFactory + if SessionFactory is None: + SessionFactory = sessionmaker(bind=get_sqlalchemy_engine()) + return SessionFactory diff --git a/backend/danswer/db/enums.py b/backend/danswer/db/enums.py index 2a02e078c60..eac048e10ab 100644 --- a/backend/danswer/db/enums.py +++ b/backend/danswer/db/enums.py @@ -6,6 +6,15 @@ class IndexingStatus(str, PyEnum): IN_PROGRESS = "in_progress" SUCCESS = "success" FAILED = "failed" + COMPLETED_WITH_ERRORS = "completed_with_errors" + + def is_terminal(self) -> bool: + terminal_states = { + IndexingStatus.SUCCESS, + IndexingStatus.COMPLETED_WITH_ERRORS, + IndexingStatus.FAILED, + } + return self in terminal_states # these may differ in the future, which is why we're okay with this duplication @@ -33,3 +42,12 @@ class IndexModelStatus(str, PyEnum): class ChatSessionSharedStatus(str, PyEnum): PUBLIC = "public" PRIVATE = "private" + + +class ConnectorCredentialPairStatus(str, PyEnum): + ACTIVE = "ACTIVE" + PAUSED = "PAUSED" + DELETING = "DELETING" + + def is_active(self) -> bool: + return self == ConnectorCredentialPairStatus.ACTIVE diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index bb7da0864f2..79557f209dc 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -1,22 +1,36 @@ from uuid import UUID +from fastapi import HTTPException +from sqlalchemy import and_ from sqlalchemy import asc from sqlalchemy import delete from sqlalchemy import desc +from sqlalchemy import exists +from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy.orm import aliased from sqlalchemy.orm import Session from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType from danswer.db.chat import get_chat_message from danswer.db.models import ChatMessageFeedback +from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document as DbDocument +from danswer.db.models import DocumentByConnectorCredentialPair from danswer.db.models import DocumentRetrievalFeedback +from danswer.db.models import User +from danswer.db.models import User__UserGroup +from danswer.db.models import UserGroup__ConnectorCredentialPair +from danswer.db.models import UserRole from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import UpdateRequest +from danswer.utils.logger import setup_logger +logger = setup_logger() -def fetch_db_doc_by_id(doc_id: str, db_session: Session) -> DbDocument: + +def _fetch_db_doc_by_id(doc_id: str, db_session: Session) -> DbDocument: stmt = select(DbDocument).where(DbDocument.id == doc_id) result = db_session.execute(stmt) doc = result.scalar_one_or_none() @@ -27,15 +41,78 @@ def fetch_db_doc_by_id(doc_id: str, db_session: Session) -> DbDocument: return doc +def _add_user_filters( + stmt: Select, user: User | None, get_editable: bool = True +) -> Select: + # If user is None, assume the user is an admin or auth is disabled + if user is None or user.role == UserRole.ADMIN: + return stmt + + DocByCC = aliased(DocumentByConnectorCredentialPair) + CCPair = aliased(ConnectorCredentialPair) + UG__CCpair = aliased(UserGroup__ConnectorCredentialPair) + User__UG = aliased(User__UserGroup) + + """ + Here we select documents by relation: + User -> User__UserGroup -> UserGroup__ConnectorCredentialPair -> + ConnectorCredentialPair -> DocumentByConnectorCredentialPair -> Document + """ + stmt = ( + stmt.outerjoin(DocByCC, DocByCC.id == DbDocument.id) + .outerjoin( + CCPair, + and_( + CCPair.connector_id == DocByCC.connector_id, + CCPair.credential_id == DocByCC.credential_id, + ), + ) + .outerjoin(UG__CCpair, UG__CCpair.cc_pair_id == CCPair.id) + .outerjoin(User__UG, User__UG.user_group_id == UG__CCpair.user_group_id) + ) + + """ + Filter Documents by: + - if the user is in the user_group that owns the object + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out objects that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all objects in the groups the user is a curator + for (as well as public objects as well) + """ + where_clause = User__UG.user_id == user.id + if user.role == UserRole.CURATOR and get_editable: + where_clause &= User__UG.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id) + where_clause &= ( + ~exists() + .where(UG__CCpair.cc_pair_id == CCPair.id) + .where(~UG__CCpair.user_group_id.in_(user_groups)) + .correlate(CCPair) + ) + else: + where_clause |= CCPair.is_public == True # noqa: E712 + + return stmt.where(where_clause) + + def fetch_docs_ranked_by_boost( - db_session: Session, ascending: bool = False, limit: int = 100 + db_session: Session, + user: User | None = None, + ascending: bool = False, + limit: int = 100, ) -> list[DbDocument]: order_func = asc if ascending else desc - stmt = ( - select(DbDocument) - .order_by(order_func(DbDocument.boost), order_func(DbDocument.semantic_id)) - .limit(limit) + stmt = select(DbDocument) + + stmt = _add_user_filters(stmt=stmt, user=user, get_editable=False) + + stmt = stmt.order_by( + order_func(DbDocument.boost), order_func(DbDocument.semantic_id) ) + stmt = stmt.limit(limit) result = db_session.execute(stmt) doc_list = result.scalars().all() @@ -43,12 +120,19 @@ def fetch_docs_ranked_by_boost( def update_document_boost( - db_session: Session, document_id: str, boost: int, document_index: DocumentIndex + db_session: Session, + document_id: str, + boost: int, + document_index: DocumentIndex, + user: User | None = None, ) -> None: stmt = select(DbDocument).where(DbDocument.id == document_id) + stmt = _add_user_filters(stmt, user, get_editable=True) result = db_session.execute(stmt).scalar_one_or_none() if result is None: - raise ValueError(f"No document found with ID: '{document_id}'") + raise HTTPException( + status_code=400, detail="Document is not editable by this user" + ) result.boost = boost @@ -63,12 +147,19 @@ def update_document_boost( def update_document_hidden( - db_session: Session, document_id: str, hidden: bool, document_index: DocumentIndex + db_session: Session, + document_id: str, + hidden: bool, + document_index: DocumentIndex, + user: User | None = None, ) -> None: stmt = select(DbDocument).where(DbDocument.id == document_id) + stmt = _add_user_filters(stmt, user, get_editable=True) result = db_session.execute(stmt).scalar_one_or_none() if result is None: - raise ValueError(f"No document found with ID: '{document_id}'") + raise HTTPException( + status_code=400, detail="Document is not editable by this user" + ) result.hidden = hidden @@ -92,7 +183,7 @@ def create_doc_retrieval_feedback( feedback: SearchFeedbackType | None = None, ) -> None: """Creates a new Document feedback row and updates the boost value in Postgres and Vespa""" - db_doc = fetch_db_doc_by_id(document_id, db_session) + db_doc = _fetch_db_doc_by_id(document_id, db_session) retrieval_feedback = DocumentRetrievalFeedback( chat_message_id=message_id, diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 51c41c71986..0932d500bbd 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -1,20 +1,22 @@ from collections.abc import Sequence from sqlalchemy import and_ -from sqlalchemy import ColumnElement from sqlalchemy import delete from sqlalchemy import desc from sqlalchemy import func -from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy import update from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session -from danswer.db.models import EmbeddingModel +from danswer.connectors.models import Document +from danswer.connectors.models import DocumentErrorSummary from danswer.db.models import IndexAttempt +from danswer.db.models import IndexAttemptError from danswer.db.models import IndexingStatus from danswer.db.models import IndexModelStatus +from danswer.db.models import SearchSettings +from danswer.server.documents.models import ConnectorCredentialPair from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.utils.logger import setup_logger from danswer.utils.telemetry import optional_telemetry @@ -23,6 +25,22 @@ logger = setup_logger() +def get_last_attempt_for_cc_pair( + cc_pair_id: int, + search_settings_id: int, + db_session: Session, +) -> IndexAttempt | None: + return ( + db_session.query(IndexAttempt) + .filter( + IndexAttempt.connector_credential_pair_id == cc_pair_id, + IndexAttempt.search_settings_id == search_settings_id, + ) + .order_by(IndexAttempt.time_updated.desc()) + .first() + ) + + def get_index_attempt( db_session: Session, index_attempt_id: int ) -> IndexAttempt | None: @@ -31,16 +49,14 @@ def get_index_attempt( def create_index_attempt( - connector_id: int, - credential_id: int, - embedding_model_id: int, + connector_credential_pair_id: int, + search_settings_id: int, db_session: Session, from_beginning: bool = False, ) -> int: new_attempt = IndexAttempt( - connector_id=connector_id, - credential_id=credential_id, - embedding_model_id=embedding_model_id, + connector_credential_pair_id=connector_credential_pair_id, + search_settings_id=search_settings_id, from_beginning=from_beginning, status=IndexingStatus.NOT_STARTED, ) @@ -56,7 +72,9 @@ def get_inprogress_index_attempts( ) -> list[IndexAttempt]: stmt = select(IndexAttempt) if connector_id is not None: - stmt = stmt.where(IndexAttempt.connector_id == connector_id) + stmt = stmt.where( + IndexAttempt.connector_credential_pair.has(connector_id=connector_id) + ) stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS) incomplete_attempts = db_session.scalars(stmt) @@ -65,21 +83,31 @@ def get_inprogress_index_attempts( def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]: """This eagerly loads the connector and credential so that the db_session can be expired - before running long-living indexing jobs, which causes increasing memory usage""" + before running long-living indexing jobs, which causes increasing memory usage. + + Results are ordered by time_created (oldest to newest).""" stmt = select(IndexAttempt) stmt = stmt.where(IndexAttempt.status == IndexingStatus.NOT_STARTED) + stmt = stmt.order_by(IndexAttempt.time_created) stmt = stmt.options( - joinedload(IndexAttempt.connector), joinedload(IndexAttempt.credential) + joinedload(IndexAttempt.connector_credential_pair).joinedload( + ConnectorCredentialPair.connector + ), + joinedload(IndexAttempt.connector_credential_pair).joinedload( + ConnectorCredentialPair.credential + ), ) new_attempts = db_session.scalars(stmt) return list(new_attempts.all()) -def mark_attempt_in_progress__no_commit( +def mark_attempt_in_progress( index_attempt: IndexAttempt, + db_session: Session, ) -> None: index_attempt.status = IndexingStatus.IN_PROGRESS index_attempt.time_started = index_attempt.time_started or func.now() # type: ignore + db_session.commit() def mark_attempt_succeeded( @@ -91,6 +119,15 @@ def mark_attempt_succeeded( db_session.commit() +def mark_attempt_partially_succeeded( + index_attempt: IndexAttempt, + db_session: Session, +) -> None: + index_attempt.status = IndexingStatus.COMPLETED_WITH_ERRORS + db_session.add(index_attempt) + db_session.commit() + + def mark_attempt_failed( index_attempt: IndexAttempt, db_session: Session, @@ -103,7 +140,7 @@ def mark_attempt_failed( db_session.add(index_attempt) db_session.commit() - source = index_attempt.connector.source + source = index_attempt.connector_credential_pair.connector.source optional_telemetry(record_type=RecordType.FAILURE, data={"connector": source}) @@ -125,14 +162,19 @@ def update_docs_indexed( def get_last_attempt( connector_id: int, credential_id: int, - embedding_model_id: int | None, + search_settings_id: int | None, db_session: Session, ) -> IndexAttempt | None: - stmt = select(IndexAttempt).where( - IndexAttempt.connector_id == connector_id, - IndexAttempt.credential_id == credential_id, - IndexAttempt.embedding_model_id == embedding_model_id, + stmt = ( + select(IndexAttempt) + .join(ConnectorCredentialPair) + .where( + ConnectorCredentialPair.connector_id == connector_id, + ConnectorCredentialPair.credential_id == credential_id, + IndexAttempt.search_settings_id == search_settings_id, + ) ) + # Note, the below is using time_created instead of time_updated stmt = stmt.order_by(desc(IndexAttempt.time_created)) @@ -140,61 +182,101 @@ def get_last_attempt( def get_latest_index_attempts( - connector_credential_pair_identifiers: list[ConnectorCredentialPairIdentifier], secondary_index: bool, db_session: Session, ) -> Sequence[IndexAttempt]: ids_stmt = select( - IndexAttempt.connector_id, - IndexAttempt.credential_id, - func.max(IndexAttempt.time_created).label("max_time_created"), - ).join(EmbeddingModel, IndexAttempt.embedding_model_id == EmbeddingModel.id) + IndexAttempt.connector_credential_pair_id, + func.max(IndexAttempt.id).label("max_id"), + ).join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id) if secondary_index: - ids_stmt = ids_stmt.where(EmbeddingModel.status == IndexModelStatus.FUTURE) + ids_stmt = ids_stmt.where(SearchSettings.status == IndexModelStatus.FUTURE) else: - ids_stmt = ids_stmt.where(EmbeddingModel.status == IndexModelStatus.PRESENT) + ids_stmt = ids_stmt.where(SearchSettings.status == IndexModelStatus.PRESENT) - where_stmts: list[ColumnElement] = [] - for connector_credential_pair_identifier in connector_credential_pair_identifiers: - where_stmts.append( - and_( - IndexAttempt.connector_id - == connector_credential_pair_identifier.connector_id, - IndexAttempt.credential_id - == connector_credential_pair_identifier.credential_id, - ) - ) - if where_stmts: - ids_stmt = ids_stmt.where(or_(*where_stmts)) - ids_stmt = ids_stmt.group_by(IndexAttempt.connector_id, IndexAttempt.credential_id) - ids_subqery = ids_stmt.subquery() + ids_stmt = ids_stmt.group_by(IndexAttempt.connector_credential_pair_id) + ids_subquery = ids_stmt.subquery() stmt = ( select(IndexAttempt) .join( - ids_subqery, - and_( - ids_subqery.c.connector_id == IndexAttempt.connector_id, - ids_subqery.c.credential_id == IndexAttempt.credential_id, - ), + ids_subquery, + IndexAttempt.connector_credential_pair_id + == ids_subquery.c.connector_credential_pair_id, ) - .where(IndexAttempt.time_created == ids_subqery.c.max_time_created) + .where(IndexAttempt.id == ids_subquery.c.max_id) + ) + + return db_session.execute(stmt).scalars().all() + + +def get_index_attempts_for_connector( + db_session: Session, + connector_id: int, + only_current: bool = True, + disinclude_finished: bool = False, +) -> Sequence[IndexAttempt]: + stmt = ( + select(IndexAttempt) + .join(ConnectorCredentialPair) + .where(ConnectorCredentialPair.connector_id == connector_id) ) + if disinclude_finished: + stmt = stmt.where( + IndexAttempt.status.in_( + [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS] + ) + ) + if only_current: + stmt = stmt.join(SearchSettings).where( + SearchSettings.status == IndexModelStatus.PRESENT + ) + stmt = stmt.order_by(IndexAttempt.time_created.desc()) return db_session.execute(stmt).scalars().all() +def get_latest_finished_index_attempt_for_cc_pair( + connector_credential_pair_id: int, + secondary_index: bool, + db_session: Session, +) -> IndexAttempt | None: + stmt = select(IndexAttempt).distinct() + stmt = stmt.where( + IndexAttempt.connector_credential_pair_id == connector_credential_pair_id, + IndexAttempt.status.not_in( + [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS] + ), + ) + if secondary_index: + stmt = stmt.join(SearchSettings).where( + SearchSettings.status == IndexModelStatus.FUTURE + ) + else: + stmt = stmt.join(SearchSettings).where( + SearchSettings.status == IndexModelStatus.PRESENT + ) + stmt = stmt.order_by(desc(IndexAttempt.time_created)) + stmt = stmt.limit(1) + return db_session.execute(stmt).scalar_one_or_none() + + def get_index_attempts_for_cc_pair( db_session: Session, cc_pair_identifier: ConnectorCredentialPairIdentifier, only_current: bool = True, disinclude_finished: bool = False, ) -> Sequence[IndexAttempt]: - stmt = select(IndexAttempt).where( - and_( - IndexAttempt.connector_id == cc_pair_identifier.connector_id, - IndexAttempt.credential_id == cc_pair_identifier.credential_id, + stmt = ( + select(IndexAttempt) + .join(ConnectorCredentialPair) + .where( + and_( + ConnectorCredentialPair.connector_id == cc_pair_identifier.connector_id, + ConnectorCredentialPair.credential_id + == cc_pair_identifier.credential_id, + ) ) ) if disinclude_finished: @@ -204,8 +286,8 @@ def get_index_attempts_for_cc_pair( ) ) if only_current: - stmt = stmt.join(EmbeddingModel).where( - EmbeddingModel.status == IndexModelStatus.PRESENT + stmt = stmt.join(SearchSettings).where( + SearchSettings.status == IndexModelStatus.PRESENT ) stmt = stmt.order_by(IndexAttempt.time_created.desc()) @@ -218,26 +300,28 @@ def delete_index_attempts( db_session: Session, ) -> None: stmt = delete(IndexAttempt).where( - IndexAttempt.connector_id == connector_id, - IndexAttempt.credential_id == credential_id, + IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id, + ConnectorCredentialPair.connector_id == connector_id, + ConnectorCredentialPair.credential_id == credential_id, ) + db_session.execute(stmt) def expire_index_attempts( - embedding_model_id: int, + search_settings_id: int, db_session: Session, ) -> None: delete_query = ( delete(IndexAttempt) - .where(IndexAttempt.embedding_model_id == embedding_model_id) + .where(IndexAttempt.search_settings_id == search_settings_id) .where(IndexAttempt.status == IndexingStatus.NOT_STARTED) ) db_session.execute(delete_query) update_query = ( update(IndexAttempt) - .where(IndexAttempt.embedding_model_id == embedding_model_id) + .where(IndexAttempt.search_settings_id == search_settings_id) .where(IndexAttempt.status != IndexingStatus.SUCCESS) .values( status=IndexingStatus.FAILED, @@ -249,21 +333,22 @@ def expire_index_attempts( db_session.commit() -def cancel_indexing_attempts_for_connector( - connector_id: int, +def cancel_indexing_attempts_for_ccpair( + cc_pair_id: int, db_session: Session, include_secondary_index: bool = False, ) -> None: - stmt = delete(IndexAttempt).where( - IndexAttempt.connector_id == connector_id, - IndexAttempt.status == IndexingStatus.NOT_STARTED, + stmt = ( + delete(IndexAttempt) + .where(IndexAttempt.connector_credential_pair_id == cc_pair_id) + .where(IndexAttempt.status == IndexingStatus.NOT_STARTED) ) if not include_secondary_index: - subquery = select(EmbeddingModel.id).where( - EmbeddingModel.status != IndexModelStatus.FUTURE + subquery = select(SearchSettings.id).where( + SearchSettings.status != IndexModelStatus.FUTURE ) - stmt = stmt.where(IndexAttempt.embedding_model_id.in_(subquery)) + stmt = stmt.where(IndexAttempt.search_settings_id.in_(subquery)) db_session.execute(stmt) @@ -273,14 +358,16 @@ def cancel_indexing_attempts_for_connector( def cancel_indexing_attempts_past_model( db_session: Session, ) -> None: + """Stops all indexing attempts that are in progress or not started for + any embedding model that not present/future""" db_session.execute( update(IndexAttempt) .where( IndexAttempt.status.in_( [IndexingStatus.IN_PROGRESS, IndexingStatus.NOT_STARTED] ), - IndexAttempt.embedding_model_id == EmbeddingModel.id, - EmbeddingModel.status == IndexModelStatus.PAST, + IndexAttempt.search_settings_id == SearchSettings.id, + SearchSettings.status == IndexModelStatus.PAST, ) .values(status=IndexingStatus.FAILED) ) @@ -289,16 +376,17 @@ def cancel_indexing_attempts_past_model( def count_unique_cc_pairs_with_successful_index_attempts( - embedding_model_id: int | None, + search_settings_id: int | None, db_session: Session, ) -> int: """Collect all of the Index Attempts that are successful and for the specified embedding model Then do distinct by connector_id and credential_id which is equivalent to the cc-pair. Finally, do a count to get the total number of unique cc-pairs with successful attempts""" unique_pairs_count = ( - db_session.query(IndexAttempt.connector_id, IndexAttempt.credential_id) + db_session.query(IndexAttempt.connector_credential_pair_id) + .join(ConnectorCredentialPair) .filter( - IndexAttempt.embedding_model_id == embedding_model_id, + IndexAttempt.search_settings_id == search_settings_id, IndexAttempt.status == IndexingStatus.SUCCESS, ) .distinct() @@ -306,3 +394,41 @@ def count_unique_cc_pairs_with_successful_index_attempts( ) return unique_pairs_count + + +def create_index_attempt_error( + index_attempt_id: int | None, + batch: int | None, + docs: list[Document], + exception_msg: str, + exception_traceback: str, + db_session: Session, +) -> int: + doc_summaries = [] + for doc in docs: + doc_summary = DocumentErrorSummary.from_document(doc) + doc_summaries.append(doc_summary.to_dict()) + + new_error = IndexAttemptError( + index_attempt_id=index_attempt_id, + batch=batch, + doc_summaries=doc_summaries, + error_msg=exception_msg, + traceback=exception_traceback, + ) + db_session.add(new_error) + db_session.commit() + + return new_error.id + + +def get_index_attempt_errors( + index_attempt_id: int, + db_session: Session, +) -> list[IndexAttemptError]: + stmt = select(IndexAttemptError).where( + IndexAttemptError.index_attempt_id == index_attempt_id + ) + + errors = db_session.scalars(stmt) + return list(errors.all()) diff --git a/backend/danswer/db/input_prompt.py b/backend/danswer/db/input_prompt.py new file mode 100644 index 00000000000..efa54d986a1 --- /dev/null +++ b/backend/danswer/db/input_prompt.py @@ -0,0 +1,202 @@ +from uuid import UUID + +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.db.models import InputPrompt +from danswer.db.models import User +from danswer.server.features.input_prompt.models import InputPromptSnapshot +from danswer.server.manage.models import UserInfo +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +def insert_input_prompt_if_not_exists( + user: User | None, + input_prompt_id: int | None, + prompt: str, + content: str, + active: bool, + is_public: bool, + db_session: Session, + commit: bool = True, +) -> InputPrompt: + if input_prompt_id is not None: + input_prompt = ( + db_session.query(InputPrompt).filter_by(id=input_prompt_id).first() + ) + else: + query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt) + if user: + query = query.filter(InputPrompt.user_id == user.id) + else: + query = query.filter(InputPrompt.user_id.is_(None)) + input_prompt = query.first() + + if input_prompt is None: + input_prompt = InputPrompt( + id=input_prompt_id, + prompt=prompt, + content=content, + active=active, + is_public=is_public or user is None, + user_id=user.id if user else None, + ) + db_session.add(input_prompt) + + if commit: + db_session.commit() + + return input_prompt + + +def insert_input_prompt( + prompt: str, + content: str, + is_public: bool, + user: User | None, + db_session: Session, +) -> InputPrompt: + input_prompt = InputPrompt( + prompt=prompt, + content=content, + active=True, + is_public=is_public or user is None, + user_id=user.id if user is not None else None, + ) + db_session.add(input_prompt) + db_session.commit() + + return input_prompt + + +def update_input_prompt( + user: User | None, + input_prompt_id: int, + prompt: str, + content: str, + active: bool, + db_session: Session, +) -> InputPrompt: + input_prompt = db_session.scalar( + select(InputPrompt).where(InputPrompt.id == input_prompt_id) + ) + if input_prompt is None: + raise ValueError(f"No input prompt with id {input_prompt_id}") + + if not validate_user_prompt_authorization(user, input_prompt): + raise HTTPException(status_code=401, detail="You don't own this prompt") + + input_prompt.prompt = prompt + input_prompt.content = content + input_prompt.active = active + + db_session.commit() + return input_prompt + + +def validate_user_prompt_authorization( + user: User | None, input_prompt: InputPrompt +) -> bool: + prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt) + + if prompt.user_id is not None: + if user is None: + return False + + user_details = UserInfo.from_model(user) + if str(user_details.id) != str(prompt.user_id): + return False + return True + + +def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None: + input_prompt = db_session.scalar( + select(InputPrompt).where(InputPrompt.id == input_prompt_id) + ) + + if input_prompt is None: + raise ValueError(f"No input prompt with id {input_prompt_id}") + + if not input_prompt.is_public: + raise HTTPException(status_code=400, detail="This prompt is not public") + + db_session.delete(input_prompt) + db_session.commit() + + +def remove_input_prompt( + user: User | None, input_prompt_id: int, db_session: Session +) -> None: + input_prompt = db_session.scalar( + select(InputPrompt).where(InputPrompt.id == input_prompt_id) + ) + if input_prompt is None: + raise ValueError(f"No input prompt with id {input_prompt_id}") + + if input_prompt.is_public: + raise HTTPException( + status_code=400, detail="Cannot delete public prompts with this method" + ) + + if not validate_user_prompt_authorization(user, input_prompt): + raise HTTPException(status_code=401, detail="You do not own this prompt") + + db_session.delete(input_prompt) + db_session.commit() + + +def fetch_input_prompt_by_id( + id: int, user_id: UUID | None, db_session: Session +) -> InputPrompt: + query = select(InputPrompt).where(InputPrompt.id == id) + + if user_id: + query = query.where( + (InputPrompt.user_id == user_id) | (InputPrompt.user_id is None) + ) + else: + # If no user_id is provided, only fetch prompts without a user_id (aka public) + query = query.where(InputPrompt.user_id == None) # noqa + + result = db_session.scalar(query) + + if result is None: + raise HTTPException(422, "No input prompt found") + + return result + + +def fetch_public_input_prompts( + db_session: Session, +) -> list[InputPrompt]: + query = select(InputPrompt).where(InputPrompt.is_public) + return list(db_session.scalars(query).all()) + + +def fetch_input_prompts_by_user( + db_session: Session, + user_id: UUID | None, + active: bool | None = None, + include_public: bool = False, +) -> list[InputPrompt]: + query = select(InputPrompt) + + if user_id is not None: + if include_public: + query = query.where( + (InputPrompt.user_id == user_id) | InputPrompt.is_public + ) + else: + query = query.where(InputPrompt.user_id == user_id) + + elif include_public: + query = query.where(InputPrompt.is_public) + + if active is not None: + query = query.where(InputPrompt.active == active) + + return list(db_session.scalars(query).all()) diff --git a/backend/danswer/db/llm.py b/backend/danswer/db/llm.py index f969dbf6864..152cb130573 100644 --- a/backend/danswer/db/llm.py +++ b/backend/danswer/db/llm.py @@ -1,10 +1,60 @@ from sqlalchemy import delete +from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy.orm import Session +from danswer.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel from danswer.db.models import LLMProvider as LLMProviderModel +from danswer.db.models import LLMProvider__UserGroup +from danswer.db.models import User +from danswer.db.models import User__UserGroup +from danswer.server.manage.embedding.models import CloudEmbeddingProvider +from danswer.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest from danswer.server.manage.llm.models import FullLLMProvider from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from shared_configs.enums import EmbeddingProvider + + +def update_group_llm_provider_relationships__no_commit( + llm_provider_id: int, + group_ids: list[int] | None, + db_session: Session, +) -> None: + # Delete existing relationships + db_session.query(LLMProvider__UserGroup).filter( + LLMProvider__UserGroup.llm_provider_id == llm_provider_id + ).delete(synchronize_session="fetch") + + # Add new relationships from given group_ids + if group_ids: + new_relationships = [ + LLMProvider__UserGroup( + llm_provider_id=llm_provider_id, + user_group_id=group_id, + ) + for group_id in group_ids + ] + db_session.add_all(new_relationships) + + +def upsert_cloud_embedding_provider( + db_session: Session, provider: CloudEmbeddingProviderCreationRequest +) -> CloudEmbeddingProvider: + existing_provider = ( + db_session.query(CloudEmbeddingProviderModel) + .filter_by(provider_type=provider.provider_type) + .first() + ) + if existing_provider: + for key, value in provider.model_dump().items(): + setattr(existing_provider, key, value) + else: + new_provider = CloudEmbeddingProviderModel(**provider.model_dump()) + db_session.add(new_provider) + existing_provider = new_provider + db_session.commit() + db_session.refresh(existing_provider) + return CloudEmbeddingProvider.from_request(existing_provider) def upsert_llm_provider( @@ -13,41 +63,75 @@ def upsert_llm_provider( existing_llm_provider = db_session.scalar( select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name) ) - if existing_llm_provider: - existing_llm_provider.provider = llm_provider.provider - existing_llm_provider.api_key = llm_provider.api_key - existing_llm_provider.api_base = llm_provider.api_base - existing_llm_provider.api_version = llm_provider.api_version - existing_llm_provider.custom_config = llm_provider.custom_config - existing_llm_provider.default_model_name = llm_provider.default_model_name - existing_llm_provider.fast_default_model_name = ( - llm_provider.fast_default_model_name - ) - existing_llm_provider.model_names = llm_provider.model_names - db_session.commit() - return FullLLMProvider.from_model(existing_llm_provider) - - # if it does not exist, create a new entry - llm_provider_model = LLMProviderModel( - name=llm_provider.name, - provider=llm_provider.provider, - api_key=llm_provider.api_key, - api_base=llm_provider.api_base, - api_version=llm_provider.api_version, - custom_config=llm_provider.custom_config, - default_model_name=llm_provider.default_model_name, - fast_default_model_name=llm_provider.fast_default_model_name, - model_names=llm_provider.model_names, - is_default_provider=None, + + if not existing_llm_provider: + existing_llm_provider = LLMProviderModel(name=llm_provider.name) + db_session.add(existing_llm_provider) + + existing_llm_provider.provider = llm_provider.provider + existing_llm_provider.api_key = llm_provider.api_key + existing_llm_provider.api_base = llm_provider.api_base + existing_llm_provider.api_version = llm_provider.api_version + existing_llm_provider.custom_config = llm_provider.custom_config + existing_llm_provider.default_model_name = llm_provider.default_model_name + existing_llm_provider.fast_default_model_name = llm_provider.fast_default_model_name + existing_llm_provider.model_names = llm_provider.model_names + existing_llm_provider.is_public = llm_provider.is_public + existing_llm_provider.display_model_names = llm_provider.display_model_names + + if not existing_llm_provider.id: + # If its not already in the db, we need to generate an ID by flushing + db_session.flush() + + # Make sure the relationship table stays up to date + update_group_llm_provider_relationships__no_commit( + llm_provider_id=existing_llm_provider.id, + group_ids=llm_provider.groups, + db_session=db_session, ) - db_session.add(llm_provider_model) + db_session.commit() - return FullLLMProvider.from_model(llm_provider_model) + return FullLLMProvider.from_model(existing_llm_provider) + + +def fetch_existing_embedding_providers( + db_session: Session, +) -> list[CloudEmbeddingProviderModel]: + return list(db_session.scalars(select(CloudEmbeddingProviderModel)).all()) + + +def fetch_existing_llm_providers( + db_session: Session, + user: User | None = None, +) -> list[LLMProviderModel]: + if not user: + return list(db_session.scalars(select(LLMProviderModel)).all()) + stmt = select(LLMProviderModel).distinct() + user_groups_select = select(User__UserGroup.user_group_id).where( + User__UserGroup.user_id == user.id + ) + access_conditions = or_( + LLMProviderModel.is_public, + LLMProviderModel.id.in_( # User is part of a group that has access + select(LLMProvider__UserGroup.llm_provider_id).where( + LLMProvider__UserGroup.user_group_id.in_(user_groups_select) # type: ignore + ) + ), + ) + stmt = stmt.where(access_conditions) + + return list(db_session.scalars(stmt).all()) -def fetch_existing_llm_providers(db_session: Session) -> list[LLMProviderModel]: - return list(db_session.scalars(select(LLMProviderModel)).all()) +def fetch_embedding_provider( + db_session: Session, provider_type: EmbeddingProvider +) -> CloudEmbeddingProviderModel | None: + return db_session.scalar( + select(CloudEmbeddingProviderModel).where( + CloudEmbeddingProviderModel.provider_type == provider_type + ) + ) def fetch_default_provider(db_session: Session) -> FullLLMProvider | None: @@ -70,7 +154,24 @@ def fetch_provider(db_session: Session, provider_name: str) -> FullLLMProvider | return FullLLMProvider.from_model(provider_model) +def remove_embedding_provider( + db_session: Session, provider_type: EmbeddingProvider +) -> None: + db_session.execute( + delete(CloudEmbeddingProviderModel).where( + CloudEmbeddingProviderModel.provider_type == provider_type + ) + ) + + def remove_llm_provider(db_session: Session, provider_id: int) -> None: + # Remove LLMProvider's dependent relationships + db_session.execute( + delete(LLMProvider__UserGroup).where( + LLMProvider__UserGroup.llm_provider_id == provider_id + ) + ) + # Remove LLMProvider db_session.execute( delete(LLMProviderModel).where(LLMProviderModel.id == provider_id) ) diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index f7d16743c65..3cdec323961 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -5,12 +5,13 @@ from typing import Literal from typing import NotRequired from typing import Optional -from typing import TypedDict +from typing_extensions import TypedDict # noreorder from uuid import UUID from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID +from fastapi_users_db_sqlalchemy.generics import TIMESTAMPAware from sqlalchemy import Boolean from sqlalchemy import DateTime from sqlalchemy import Enum @@ -33,14 +34,17 @@ from sqlalchemy.types import TypeDecorator from danswer.auth.schemas import UserRole +from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS from danswer.configs.constants import DEFAULT_BOOST from danswer.configs.constants import DocumentSource from danswer.configs.constants import FileOrigin from danswer.configs.constants import MessageType +from danswer.configs.constants import NotificationType from danswer.configs.constants import SearchFeedbackType from danswer.configs.constants import TokenRateLimitScope from danswer.connectors.models import InputType from danswer.db.enums import ChatSessionSharedStatus +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.enums import IndexingStatus from danswer.db.enums import IndexModelStatus from danswer.db.enums import TaskStatus @@ -50,9 +54,10 @@ from danswer.llm.override_models import LLMOverride from danswer.llm.override_models import PromptOverride from danswer.search.enums import RecencyBiasSetting -from danswer.search.enums import SearchType from danswer.utils.encryption import decrypt_bytes_to_string from danswer.utils.encryption import encrypt_string_to_bytes +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import RerankerProvider class Base(DeclarativeBase): @@ -117,9 +122,17 @@ class User(SQLAlchemyBaseUserTableUUID, Base): # if specified, controls the assistants that are shown to the user + their order # if not specified, all assistants are shown chosen_assistants: Mapped[list[int]] = mapped_column( - postgresql.ARRAY(Integer), nullable=True + postgresql.JSONB(), nullable=True + ) + + oidc_expiry: Mapped[datetime.datetime] = mapped_column( + TIMESTAMPAware(timezone=True), nullable=True ) + default_model: Mapped[str] = mapped_column(Text, nullable=True) + # organized in typical structured fashion + # formatted as `displayName__provider__modelName` + # relationships credentials: Mapped[list["Credential"]] = relationship( "Credential", back_populates="user", lazy="joined" @@ -130,11 +143,43 @@ class User(SQLAlchemyBaseUserTableUUID, Base): chat_folders: Mapped[list["ChatFolder"]] = relationship( "ChatFolder", back_populates="user" ) + prompts: Mapped[list["Prompt"]] = relationship("Prompt", back_populates="user") + input_prompts: Mapped[list["InputPrompt"]] = relationship( + "InputPrompt", back_populates="user" + ) + # Personas owned by this user personas: Mapped[list["Persona"]] = relationship("Persona", back_populates="user") # Custom tools created by this user custom_tools: Mapped[list["Tool"]] = relationship("Tool", back_populates="user") + # Notifications for the UI + notifications: Mapped[list["Notification"]] = relationship( + "Notification", back_populates="user" + ) + + +class InputPrompt(Base): + __tablename__ = "inputprompt" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + prompt: Mapped[str] = mapped_column(String) + content: Mapped[str] = mapped_column(String) + active: Mapped[bool] = mapped_column(Boolean) + user: Mapped[User | None] = relationship("User", back_populates="input_prompts") + is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + + +class InputPrompt__User(Base): + __tablename__ = "inputprompt__user" + + input_prompt_id: Mapped[int] = mapped_column( + ForeignKey("inputprompt.id"), primary_key=True + ) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("inputprompt.id"), primary_key=True + ) class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base): @@ -156,6 +201,24 @@ class ApiKey(Base): DateTime(timezone=True), server_default=func.now() ) + # Add this relationship to access the User object via user_id + user: Mapped["User"] = relationship("User", foreign_keys=[user_id]) + + +class Notification(Base): + __tablename__ = "notification" + + id: Mapped[int] = mapped_column(primary_key=True) + notif_type: Mapped[NotificationType] = mapped_column( + Enum(NotificationType, native_enum=False) + ) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + dismissed: Mapped[bool] = mapped_column(Boolean, default=False) + last_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) + first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) + + user: Mapped[User] = relationship("User", back_populates="notifications") + """ Association Tables @@ -183,7 +246,9 @@ class Persona__User(Base): __tablename__ = "persona__user" persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) - user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id"), primary_key=True, nullable=True + ) class DocumentSet__User(Base): @@ -192,7 +257,9 @@ class DocumentSet__User(Base): document_set_id: Mapped[int] = mapped_column( ForeignKey("document_set.id"), primary_key=True ) - user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id"), primary_key=True, nullable=True + ) class DocumentSet__ConnectorCredentialPair(Base): @@ -300,6 +367,9 @@ class ConnectorCredentialPair(Base): nullable=False, ) name: Mapped[str] = mapped_column(String, nullable=False) + status: Mapped[ConnectorCredentialPairStatus] = mapped_column( + Enum(ConnectorCredentialPairStatus, native_enum=False), nullable=False + ) connector_id: Mapped[int] = mapped_column( ForeignKey("connector.id"), primary_key=True ) @@ -336,6 +406,9 @@ class ConnectorCredentialPair(Base): back_populates="connector_credential_pairs", overlaps="document_set", ) + index_attempts: Mapped[list["IndexAttempt"]] = relationship( + "IndexAttempt", back_populates="connector_credential_pair" + ) class Document(Base): @@ -415,6 +488,9 @@ class Connector(Base): connector_specific_config: Mapped[dict[str, Any]] = mapped_column( postgresql.JSONB() ) + indexing_start: Mapped[datetime.datetime | None] = mapped_column( + DateTime, nullable=True + ) refresh_freq: Mapped[int | None] = mapped_column(Integer, nullable=True) prune_freq: Mapped[int | None] = mapped_column(Integer, nullable=True) time_created: Mapped[datetime.datetime] = mapped_column( @@ -423,7 +499,6 @@ class Connector(Base): time_updated: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), onupdate=func.now() ) - disabled: Mapped[bool] = mapped_column(Boolean, default=False) credentials: Mapped[list["ConnectorCredentialPair"]] = relationship( "ConnectorCredentialPair", @@ -433,14 +508,17 @@ class Connector(Base): documents_by_connector: Mapped[ list["DocumentByConnectorCredentialPair"] ] = relationship("DocumentByConnectorCredentialPair", back_populates="connector") - index_attempts: Mapped[list["IndexAttempt"]] = relationship( - "IndexAttempt", back_populates="connector" - ) class Credential(Base): __tablename__ = "credential" + name: Mapped[str] = mapped_column(String, nullable=True) + + source: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) + id: Mapped[int] = mapped_column(primary_key=True) credential_json: Mapped[dict[str, Any]] = mapped_column(EncryptedJson()) user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) @@ -453,6 +531,8 @@ class Credential(Base): DateTime(timezone=True), server_default=func.now(), onupdate=func.now() ) + curator_public: Mapped[bool] = mapped_column(Boolean, default=False) + connectors: Mapped[list["ConnectorCredentialPair"]] = relationship( "ConnectorCredentialPair", back_populates="credential", @@ -461,28 +541,51 @@ class Credential(Base): documents_by_credential: Mapped[ list["DocumentByConnectorCredentialPair"] ] = relationship("DocumentByConnectorCredentialPair", back_populates="credential") - index_attempts: Mapped[list["IndexAttempt"]] = relationship( - "IndexAttempt", back_populates="credential" - ) + user: Mapped[User | None] = relationship("User", back_populates="credentials") -class EmbeddingModel(Base): - __tablename__ = "embedding_model" - # ID is used also to indicate the order that the models are configured by the admin +class SearchSettings(Base): + __tablename__ = "search_settings" + id: Mapped[int] = mapped_column(primary_key=True) model_name: Mapped[str] = mapped_column(String) model_dim: Mapped[int] = mapped_column(Integer) normalize: Mapped[bool] = mapped_column(Boolean) - query_prefix: Mapped[str] = mapped_column(String) - passage_prefix: Mapped[str] = mapped_column(String) + query_prefix: Mapped[str | None] = mapped_column(String, nullable=True) + passage_prefix: Mapped[str | None] = mapped_column(String, nullable=True) status: Mapped[IndexModelStatus] = mapped_column( Enum(IndexModelStatus, native_enum=False) ) index_name: Mapped[str] = mapped_column(String) + provider_type: Mapped[EmbeddingProvider | None] = mapped_column( + ForeignKey("embedding_provider.provider_type"), nullable=True + ) + + # Mini and Large Chunks (large chunk also checks for model max context) + multipass_indexing: Mapped[bool] = mapped_column(Boolean, default=True) + + multilingual_expansion: Mapped[list[str]] = mapped_column( + postgresql.ARRAY(String), default=[] + ) + + # Reranking settings + disable_rerank_for_streaming: Mapped[bool] = mapped_column(Boolean, default=False) + rerank_model_name: Mapped[str | None] = mapped_column(String, nullable=True) + rerank_provider_type: Mapped[RerankerProvider | None] = mapped_column( + Enum(RerankerProvider, native_enum=False), nullable=True + ) + rerank_api_key: Mapped[str | None] = mapped_column(String, nullable=True) + num_rerank: Mapped[int] = mapped_column(Integer, default=NUM_POSTPROCESSED_RESULTS) + + cloud_provider: Mapped["CloudEmbeddingProvider"] = relationship( + "CloudEmbeddingProvider", + back_populates="search_settings", + foreign_keys=[provider_type], + ) index_attempts: Mapped[list["IndexAttempt"]] = relationship( - "IndexAttempt", back_populates="embedding_model" + "IndexAttempt", back_populates="search_settings" ) __table_args__ = ( @@ -500,6 +603,14 @@ class EmbeddingModel(Base): ), ) + def __repr__(self) -> str: + return f"" + + @property + def api_key(self) -> str | None: + return self.cloud_provider.api_key if self.cloud_provider is not None else None + class IndexAttempt(Base): """ @@ -511,14 +622,12 @@ class IndexAttempt(Base): __tablename__ = "index_attempt" id: Mapped[int] = mapped_column(primary_key=True) - connector_id: Mapped[int | None] = mapped_column( - ForeignKey("connector.id"), - nullable=True, - ) - credential_id: Mapped[int | None] = mapped_column( - ForeignKey("credential.id"), - nullable=True, + + connector_credential_pair_id: Mapped[int] = mapped_column( + ForeignKey("connector_credential_pair.id"), + nullable=False, ) + # Some index attempts that run from beginning will still have this as False # This is only for attempts that are explicitly marked as from the start via # the run once API @@ -535,8 +644,8 @@ class IndexAttempt(Base): # only filled if status = "failed" AND an unhandled exception caused the failure full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None) # Nullable because in the past, we didn't allow swapping out embedding models live - embedding_model_id: Mapped[int] = mapped_column( - ForeignKey("embedding_model.id"), + search_settings_id: Mapped[int] = mapped_column( + ForeignKey("search_settings.id"), nullable=False, ) time_created: Mapped[datetime.datetime] = mapped_column( @@ -554,21 +663,20 @@ class IndexAttempt(Base): onupdate=func.now(), ) - connector: Mapped[Connector] = relationship( - "Connector", back_populates="index_attempts" + connector_credential_pair: Mapped[ConnectorCredentialPair] = relationship( + "ConnectorCredentialPair", back_populates="index_attempts" ) - credential: Mapped[Credential] = relationship( - "Credential", back_populates="index_attempts" - ) - embedding_model: Mapped[EmbeddingModel] = relationship( - "EmbeddingModel", back_populates="index_attempts" + + search_settings: Mapped[SearchSettings] = relationship( + "SearchSettings", back_populates="index_attempts" ) + error_rows = relationship("IndexAttemptError", back_populates="index_attempt") + __table_args__ = ( Index( "ix_index_attempt_latest_for_connector_credential_pair", - "connector_id", - "credential_id", + "connector_credential_pair_id", "time_created", ), ) @@ -576,13 +684,59 @@ class IndexAttempt(Base): def __repr__(self) -> str: return ( f"" f"time_created={self.time_created!r}, " f"time_updated={self.time_updated!r}, " ) + def is_finished(self) -> bool: + return self.status.is_terminal() + + +class IndexAttemptError(Base): + """ + Represents an error that was encountered during an IndexAttempt. + """ + + __tablename__ = "index_attempt_errors" + + id: Mapped[int] = mapped_column(primary_key=True) + + index_attempt_id: Mapped[int] = mapped_column( + ForeignKey("index_attempt.id"), + nullable=True, + ) + + # The index of the batch where the error occurred (if looping thru batches) + # Just informational. + batch: Mapped[int | None] = mapped_column(Integer, default=None) + doc_summaries: Mapped[list[Any]] = mapped_column(postgresql.JSONB()) + error_msg: Mapped[str | None] = mapped_column(Text, default=None) + traceback: Mapped[str | None] = mapped_column(Text, default=None) + time_created: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), + server_default=func.now(), + ) + + # This is the reverse side of the relationship + index_attempt = relationship("IndexAttempt", back_populates="error_rows") + + __table_args__ = ( + Index( + "index_attempt_id", + "time_created", + ), + ) + + def __repr__(self) -> str: + return ( + f"" + f"time_created={self.time_created!r}, " + ) + class DocumentByConnectorCredentialPair(Base): """Represents an indexing of a document by a specific connector / credential pair""" @@ -647,6 +801,9 @@ class SearchDoc(Base): ) is_internet: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True) + is_relevant: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + relevance_explanation: Mapped[str | None] = mapped_column(String, nullable=True) + chat_messages = relationship( "ChatMessage", secondary="chat_message__search_doc", @@ -751,6 +908,7 @@ class ChatMessage(Base): Integer, ForeignKey("persona.id"), nullable=True ) + overridden_model: Mapped[str | None] = mapped_column(String, nullable=True) parent_message: Mapped[int | None] = mapped_column(Integer, nullable=True) latest_child_message: Mapped[int | None] = mapped_column(Integer, nullable=True) message: Mapped[str] = mapped_column(Text) @@ -794,6 +952,8 @@ class ChatMessage(Base): secondary="chat_message__search_doc", back_populates="chat_messages", ) + # NOTE: Should always be attached to the `assistant` message. + # represents the tool calls used to generate this message tool_calls: Mapped[list["ToolCall"]] = relationship( "ToolCall", back_populates="message", @@ -879,11 +1039,6 @@ class ChatMessageFeedback(Base): ) -""" -Structures, Organizational, Configurations Tables -""" - - class LLMProvider(Base): __tablename__ = "llm_provider" @@ -901,6 +1056,11 @@ class LLMProvider(Base): default_model_name: Mapped[str] = mapped_column(String) fast_default_model_name: Mapped[str | None] = mapped_column(String, nullable=True) + # Models to actually disp;aly to users + # If nulled out, we assume in the application logic we should present all + display_model_names: Mapped[list[str] | None] = mapped_column( + postgresql.ARRAY(String), nullable=True + ) # The LLMs that are available for this provider. Only required if not a default provider. # If a default provider, then the LLM options are pulled from the `options.py` file. # If needed, can be pulled out as a separate table in the future. @@ -910,6 +1070,30 @@ class LLMProvider(Base): # should only be set for a single provider is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True) + # EE only + is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + groups: Mapped[list["UserGroup"]] = relationship( + "UserGroup", + secondary="llm_provider__user_group", + viewonly=True, + ) + + +class CloudEmbeddingProvider(Base): + __tablename__ = "embedding_provider" + + provider_type: Mapped[EmbeddingProvider] = mapped_column( + Enum(EmbeddingProvider), primary_key=True + ) + api_key: Mapped[str | None] = mapped_column(EncryptedString()) + search_settings: Mapped[list["SearchSettings"]] = relationship( + "SearchSettings", + back_populates="cloud_provider", + foreign_keys="SearchSettings.provider_type", + ) + + def __repr__(self) -> str: + return f"" class DocumentSet(Base): @@ -1026,12 +1210,10 @@ class Persona(Base): user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) name: Mapped[str] = mapped_column(String) description: Mapped[str] = mapped_column(String) - # Currently stored but unused, all flows use hybrid - search_type: Mapped[SearchType] = mapped_column( - Enum(SearchType, native_enum=False), default=SearchType.HYBRID - ) # Number of chunks to pass to the LLM for generation. num_chunks: Mapped[float | None] = mapped_column(Float, nullable=True) + chunks_above: Mapped[int] = mapped_column(Integer) + chunks_below: Mapped[int] = mapped_column(Integer) # Pass every chunk through LLM for evaluation, fairly expensive # Can be turned off globally by admin, in which case, this setting is ignored llm_relevance_filter: Mapped[bool] = mapped_column(Boolean) @@ -1062,9 +1244,14 @@ class Persona(Base): # controls the ordering of personas in the UI # higher priority personas are displayed first, ties are resolved by the ID, # where lower value IDs (e.g. created earlier) are displayed first - display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=None) + display_priority: Mapped[int | None] = mapped_column( + Integer, nullable=True, default=None + ) deleted: Mapped[bool] = mapped_column(Boolean, default=False) - is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + + uploaded_image_id: Mapped[str | None] = mapped_column(String, nullable=True) + icon_color: Mapped[str | None] = mapped_column(String, nullable=True) + icon_shape: Mapped[int | None] = mapped_column(Integer, nullable=True) # These are only defaults, users can select from all if desired prompts: Mapped[list[Prompt]] = relationship( @@ -1092,6 +1279,7 @@ class Persona(Base): viewonly=True, ) # EE only + is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) groups: Mapped[list["UserGroup"]] = relationship( "UserGroup", secondary="persona__user_group", @@ -1121,8 +1309,7 @@ class ChannelConfig(TypedDict): channel_names: list[str] respond_tag_only: NotRequired[bool] # defaults to False respond_to_bots: NotRequired[bool] # defaults to False - respond_team_member_list: NotRequired[list[str]] - respond_slack_group_list: NotRequired[list[str]] + respond_member_group_list: NotRequired[list[str]] answer_filters: NotRequired[list[AllowedAnswerFilters]] # If None then no follow up # If empty list, follow up with no tags @@ -1195,6 +1382,7 @@ class SlackBotConfig(Base): response_type: Mapped[SlackBotResponseType] = mapped_column( Enum(SlackBotResponseType, native_enum=False), nullable=False ) + enable_auto_filters: Mapped[bool] = mapped_column( Boolean, nullable=False, default=False ) @@ -1275,10 +1463,14 @@ class SamlAccount(Base): class User__UserGroup(Base): __tablename__ = "user__user_group" + is_curator: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + user_group_id: Mapped[int] = mapped_column( ForeignKey("user_group.id"), primary_key=True ) - user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id"), primary_key=True, nullable=True + ) class UserGroup__ConnectorCredentialPair(Base): @@ -1315,6 +1507,17 @@ class Persona__UserGroup(Base): ) +class LLMProvider__UserGroup(Base): + __tablename__ = "llm_provider__user_group" + + llm_provider_id: Mapped[int] = mapped_column( + ForeignKey("llm_provider.id"), primary_key=True + ) + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + + class DocumentSet__UserGroup(Base): __tablename__ = "document_set__user_group" @@ -1326,6 +1529,17 @@ class DocumentSet__UserGroup(Base): ) +class Credential__UserGroup(Base): + __tablename__ = "credential__user_group" + + credential_id: Mapped[int] = mapped_column( + ForeignKey("credential.id"), primary_key=True + ) + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + + class UserGroup(Base): __tablename__ = "user_group" @@ -1342,6 +1556,10 @@ class UserGroup(Base): "User", secondary=User__UserGroup.__table__, ) + user_group_relationships: Mapped[list[User__UserGroup]] = relationship( + "User__UserGroup", + viewonly=True, + ) cc_pairs: Mapped[list[ConnectorCredentialPair]] = relationship( "ConnectorCredentialPair", secondary=UserGroup__ConnectorCredentialPair.__table__, @@ -1363,6 +1581,10 @@ class UserGroup(Base): secondary=DocumentSet__UserGroup.__table__, viewonly=True, ) + credentials: Mapped[list[Credential]] = relationship( + "Credential", + secondary=Credential__UserGroup.__table__, + ) """Tables related to Token Rate Limiting diff --git a/backend/danswer/db/notification.py b/backend/danswer/db/notification.py new file mode 100644 index 00000000000..61586208c69 --- /dev/null +++ b/backend/danswer/db/notification.py @@ -0,0 +1,76 @@ +from sqlalchemy import select +from sqlalchemy.orm import Session +from sqlalchemy.sql import func + +from danswer.configs.constants import NotificationType +from danswer.db.models import Notification +from danswer.db.models import User + + +def create_notification( + user: User | None, + notif_type: NotificationType, + db_session: Session, +) -> Notification: + notification = Notification( + user_id=user.id if user else None, + notif_type=notif_type, + dismissed=False, + last_shown=func.now(), + first_shown=func.now(), + ) + db_session.add(notification) + db_session.commit() + return notification + + +def get_notification_by_id( + notification_id: int, user: User | None, db_session: Session +) -> Notification: + user_id = user.id if user else None + notif = db_session.get(Notification, notification_id) + if not notif: + raise ValueError(f"No notification found with id {notification_id}") + if notif.user_id != user_id: + raise PermissionError( + f"User {user_id} is not authorized to access notification {notification_id}" + ) + return notif + + +def get_notifications( + user: User | None, + db_session: Session, + notif_type: NotificationType | None = None, + include_dismissed: bool = True, +) -> list[Notification]: + query = select(Notification).where( + Notification.user_id == user.id if user else Notification.user_id.is_(None) + ) + if not include_dismissed: + query = query.where(Notification.dismissed.is_(False)) + if notif_type: + query = query.where(Notification.notif_type == notif_type) + return list(db_session.execute(query).scalars().all()) + + +def dismiss_all_notifications( + notif_type: NotificationType, + db_session: Session, +) -> None: + db_session.query(Notification).filter(Notification.notif_type == notif_type).update( + {"dismissed": True} + ) + db_session.commit() + + +def dismiss_notification(notification: Notification, db_session: Session) -> None: + notification.dismissed = True + db_session.commit() + + +def update_notification_last_shown( + notification: Notification, db_session: Session +) -> None: + notification.last_shown = func.now() + db_session.commit() diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py index 872bca89b0f..bbf45a1d9ad 100644 --- a/backend/danswer/db/persona.py +++ b/backend/danswer/db/persona.py @@ -4,15 +4,21 @@ from fastapi import HTTPException from sqlalchemy import delete +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import not_ from sqlalchemy import or_ +from sqlalchemy import Select from sqlalchemy import select from sqlalchemy import update +from sqlalchemy.orm import aliased +from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session from danswer.auth.schemas import UserRole from danswer.configs.chat_configs import BING_API_KEY +from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE +from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW from danswer.db.constants import SLACK_BOT_PERSONA_PREFIX from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import DocumentSet @@ -24,6 +30,7 @@ from danswer.db.models import Tool from danswer.db.models import User from danswer.db.models import User__UserGroup +from danswer.db.models import UserGroup from danswer.search.enums import RecencyBiasSetting from danswer.server.features.persona.models import CreatePersonaRequest from danswer.server.features.persona.models import PersonaSnapshot @@ -33,6 +40,89 @@ logger = setup_logger() +def _add_user_filters( + stmt: Select, user: User | None, get_editable: bool = True +) -> Select: + # If user is None, assume the user is an admin or auth is disabled + if user is None or user.role == UserRole.ADMIN: + return stmt + + Persona__UG = aliased(Persona__UserGroup) + User__UG = aliased(User__UserGroup) + """ + Here we select cc_pairs by relation: + User -> User__UserGroup -> Persona__UserGroup -> Persona + """ + stmt = ( + stmt.outerjoin(Persona__UG) + .outerjoin( + User__UserGroup, + User__UserGroup.user_group_id == Persona__UG.user_group_id, + ) + .outerjoin( + Persona__User, + Persona__User.persona_id == Persona.id, + ) + ) + """ + Filter Personas by: + - if the user is in the user_group that owns the Persona + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out Personas that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all Personas in the groups the user is a curator + for (as well as public Personas) + - if we are not editing, we return all Personas directly connected to the user + """ + where_clause = User__UserGroup.user_id == user.id + if user.role == UserRole.CURATOR and get_editable: + where_clause &= User__UserGroup.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id) + if user.role == UserRole.CURATOR: + user_groups = user_groups.where(User__UG.is_curator == True) # noqa: E712 + where_clause &= ( + ~exists() + .where(Persona__UG.persona_id == Persona.id) + .where(~Persona__UG.user_group_id.in_(user_groups)) + .correlate(Persona) + ) + else: + where_clause |= Persona.is_public == True # noqa: E712 + where_clause &= Persona.is_visible == True # noqa: E712 + where_clause |= Persona__User.user_id == user.id + where_clause |= Persona.user_id == user.id + + return stmt.where(where_clause) + + +def fetch_persona_by_id( + db_session: Session, persona_id: int, user: User | None, get_editable: bool = True +) -> Persona: + stmt = select(Persona).where(Persona.id == persona_id).distinct() + stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable) + persona = db_session.scalars(stmt).one_or_none() + if not persona: + raise HTTPException( + status_code=403, + detail=f"Persona with ID {persona_id} does not exist or user is not authorized to access it", + ) + return persona + + +def _get_persona_by_name( + persona_name: str, user: User | None, db_session: Session +) -> Persona | None: + """Admins can see all, regular users can only fetch their own. + If user is None, assume the user is an admin or auth is disabled.""" + stmt = select(Persona).where(Persona.name == persona_name) + if user and user.role != UserRole.ADMIN: + stmt = stmt.where(Persona.user_id == user.id) + result = db_session.execute(stmt).scalar_one_or_none() + return result + + def make_persona_private( persona_id: int, user_ids: list[UUID] | None, @@ -62,25 +152,16 @@ def create_update_persona( ) -> PersonaSnapshot: """Higher level function than upsert_persona, although either is valid to use.""" # Permission to actually use these is checked later + try: - persona = upsert_persona( - persona_id=persona_id, - user=user, - name=create_persona_request.name, - description=create_persona_request.description, - num_chunks=create_persona_request.num_chunks, - llm_relevance_filter=create_persona_request.llm_relevance_filter, - llm_filter_extraction=create_persona_request.llm_filter_extraction, - recency_bias=create_persona_request.recency_bias, - prompt_ids=create_persona_request.prompt_ids, - tool_ids=create_persona_request.tool_ids, - document_set_ids=create_persona_request.document_set_ids, - llm_model_provider_override=create_persona_request.llm_model_provider_override, - llm_model_version_override=create_persona_request.llm_model_version_override, - starter_messages=create_persona_request.starter_messages, - is_public=create_persona_request.is_public, - db_session=db_session, - ) + persona_data = { + "persona_id": persona_id, + "user": user, + "db_session": db_session, + **create_persona_request.dict(exclude={"users", "groups"}), + } + + persona = upsert_persona(**persona_data) versioned_make_persona_private = fetch_versioned_implementation( "danswer.db.persona", "make_persona_private" @@ -109,13 +190,9 @@ def update_persona_shared_users( """Simplified version of `create_update_persona` which only touches the accessibility rather than any of the logic (e.g. prompt, connected data sources, etc.).""" - persona = fetch_persona_by_id(db_session=db_session, persona_id=persona_id) - if not persona: - raise HTTPException( - status_code=404, detail=f"Persona with ID {persona_id} not found" - ) - - check_user_can_edit_persona(user=user, persona=persona) + persona = fetch_persona_by_id( + db_session=db_session, persona_id=persona_id, user=user, get_editable=True + ) if persona.is_public: raise HTTPException(status_code=400, detail="Cannot share public persona") @@ -133,10 +210,6 @@ def update_persona_shared_users( ) -def fetch_persona_by_id(db_session: Session, persona_id: int) -> Persona | None: - return db_session.scalar(select(Persona).where(Persona.id == persona_id)) - - def get_prompts( user_id: UUID | None, db_session: Session, @@ -156,35 +229,17 @@ def get_prompts( def get_personas( - # if user_id is `None` assume the user is an admin or auth is disabled - user_id: UUID | None, + # if user is `None` assume the user is an admin or auth is disabled + user: User | None, db_session: Session, + get_editable: bool = True, include_default: bool = True, include_slack_bot_personas: bool = False, include_deleted: bool = False, + joinedload_all: bool = False, ) -> Sequence[Persona]: stmt = select(Persona).distinct() - if user_id is not None: - # Subquery to find all groups the user belongs to - user_groups_subquery = ( - select(User__UserGroup.user_group_id) - .where(User__UserGroup.user_id == user_id) - .subquery() - ) - - # Include personas where the user is directly related or part of a user group that has access - access_conditions = or_( - Persona.is_public == True, # noqa: E712 - Persona.id.in_( # User has access through list of users with access - select(Persona__User.persona_id).where(Persona__User.user_id == user_id) - ), - Persona.id.in_( # User is part of a group that has access - select(Persona__UserGroup.persona_id).where( - Persona__UserGroup.user_group_id.in_(user_groups_subquery) # type: ignore - ) - ), - ) - stmt = stmt.where(access_conditions) + stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable) if not include_default: stmt = stmt.where(Persona.default_persona.is_(False)) @@ -193,7 +248,16 @@ def get_personas( if not include_deleted: stmt = stmt.where(Persona.deleted.is_(False)) - return db_session.scalars(stmt).all() + if joinedload_all: + stmt = stmt.options( + joinedload(Persona.prompts), + joinedload(Persona.tools), + joinedload(Persona.document_sets), + joinedload(Persona.groups), + joinedload(Persona.users), + ) + + return db_session.execute(stmt).unique().scalars().all() def mark_persona_as_deleted( @@ -239,7 +303,7 @@ def update_all_personas_display_priority( db_session: Session, ) -> None: """Updates the display priority of all lives Personas""" - personas = get_personas(user_id=None, db_session=db_session) + personas = get_personas(user=None, db_session=db_session) available_persona_ids = {persona.id for persona in personas} if available_persona_ids != set(display_priority_map.keys()): raise ValueError("Invalid persona IDs provided") @@ -328,11 +392,19 @@ def upsert_persona( persona_id: int | None = None, default_persona: bool = False, commit: bool = True, + icon_color: str | None = None, + icon_shape: int | None = None, + uploaded_image_id: str | None = None, + display_priority: int | None = None, + is_visible: bool = True, + remove_image: bool | None = None, + chunks_above: int = CONTEXT_CHUNKS_ABOVE, + chunks_below: int = CONTEXT_CHUNKS_BELOW, ) -> Persona: if persona_id is not None: persona = db_session.query(Persona).filter_by(id=persona_id).first() else: - persona = get_persona_by_name( + persona = _get_persona_by_name( persona_name=name, user=user, db_session=db_session ) @@ -369,11 +441,16 @@ def upsert_persona( if not default_persona and persona.default_persona: raise ValueError("Cannot update default persona with non-default.") - check_user_can_edit_persona(user=user, persona=persona) + # this checks if the user has permission to edit the persona + persona = fetch_persona_by_id( + db_session=db_session, persona_id=persona.id, user=user, get_editable=True + ) persona.name = name persona.description = description persona.num_chunks = num_chunks + persona.chunks_above = chunks_above + persona.chunks_below = chunks_below persona.llm_relevance_filter = llm_relevance_filter persona.llm_filter_extraction = llm_filter_extraction persona.recency_bias = recency_bias @@ -383,6 +460,12 @@ def upsert_persona( persona.starter_messages = starter_messages persona.deleted = False # Un-delete if previously deleted persona.is_public = is_public + persona.icon_color = icon_color + persona.icon_shape = icon_shape + if remove_image or uploaded_image_id: + persona.uploaded_image_id = uploaded_image_id + persona.display_priority = display_priority + persona.is_visible = is_visible # Do not delete any associations manually added unless # a new updated list is provided @@ -405,6 +488,8 @@ def upsert_persona( name=name, description=description, num_chunks=num_chunks, + chunks_above=chunks_above, + chunks_below=chunks_below, llm_relevance_filter=llm_relevance_filter, llm_filter_extraction=llm_filter_extraction, recency_bias=recency_bias, @@ -415,6 +500,11 @@ def upsert_persona( llm_model_version_override=llm_model_version_override, starter_messages=starter_messages, tools=tools or [], + icon_shape=icon_shape, + icon_color=icon_color, + uploaded_image_id=uploaded_image_id, + display_priority=display_priority, + is_visible=is_visible, ) db_session.add(persona) @@ -456,8 +546,11 @@ def update_persona_visibility( persona_id: int, is_visible: bool, db_session: Session, + user: User | None = None, ) -> None: - persona = get_persona_by_id(persona_id=persona_id, user=None, db_session=db_session) + persona = fetch_persona_by_id( + db_session=db_session, persona_id=persona_id, user=user, get_editable=True + ) persona.is_visible = is_visible db_session.commit() @@ -470,23 +563,6 @@ def validate_persona_tools(tools: list[Tool]) -> None: ) -def check_user_can_edit_persona(user: User | None, persona: Persona) -> None: - # if user is None, assume that no-auth is turned on - if user is None: - return - - # admins can edit everything - if user.role == UserRole.ADMIN: - return - - # otherwise, make sure user owns persona - if persona.user_id != user.id: - raise HTTPException( - status_code=403, - detail=f"User not authorized to edit persona with ID {persona.id}", - ) - - def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> Sequence[Prompt]: """Unsafe, can fetch prompts from all users""" if not prompt_ids: @@ -548,6 +624,8 @@ def get_default_prompt__read_only() -> Prompt: return _get_default_prompt(db_session) +# TODO: since this gets called with every chat message, could it be more efficient to pregenerate +# a direct mapping indicating whether a user has access to a specific persona? def get_persona_by_id( persona_id: int, # if user is `None` assume the user is an admin or auth is disabled @@ -556,32 +634,53 @@ def get_persona_by_id( include_deleted: bool = False, is_for_edit: bool = True, # NOTE: assume true for safety ) -> Persona: - stmt = select(Persona).where(Persona.id == persona_id) - - or_conditions = [] - - # if user is an admin, they should have access to all Personas - if user is not None and user.role != UserRole.ADMIN: - or_conditions.extend([Persona.user_id == user.id, Persona.user_id.is_(None)]) - - # if we aren't editing, also give access to all public personas - if not is_for_edit: - or_conditions.append(Persona.is_public.is_(True)) - - if or_conditions: - stmt = stmt.where(or_(*or_conditions)) + persona_stmt = ( + select(Persona) + .distinct() + .outerjoin(Persona.groups) + .outerjoin(Persona.users) + .outerjoin(UserGroup.user_group_relationships) + .where(Persona.id == persona_id) + ) if not include_deleted: - stmt = stmt.where(Persona.deleted.is_(False)) + persona_stmt = persona_stmt.where(Persona.deleted.is_(False)) - result = db_session.execute(stmt) - persona = result.scalar_one_or_none() + if not user or user.role == UserRole.ADMIN: + result = db_session.execute(persona_stmt) + persona = result.scalar_one_or_none() + if persona is None: + raise ValueError( + f"Persona with ID {persona_id} does not exist or does not belong to user" + ) + return persona + + # or check if user owns persona + or_conditions = Persona.user_id == user.id + # allow access if persona user id is None + or_conditions |= Persona.user_id == None # noqa: E711 + if not is_for_edit: + # if the user is in a group related to the persona + or_conditions |= User__UserGroup.user_id == user.id + # if the user is in the .users of the persona + or_conditions |= User.id == user.id + or_conditions |= Persona.is_public == True # noqa: E712 + elif user.role == UserRole.GLOBAL_CURATOR: + # global curators can edit personas for the groups they are in + or_conditions |= User__UserGroup.user_id == user.id + elif user.role == UserRole.CURATOR: + # curators can edit personas for the groups they are curators of + or_conditions |= (User__UserGroup.user_id == user.id) & ( + User__UserGroup.is_curator == True # noqa: E712 + ) + persona_stmt = persona_stmt.where(or_conditions) + result = db_session.execute(persona_stmt) + persona = result.scalar_one_or_none() if persona is None: raise ValueError( f"Persona with ID {persona_id} does not exist or does not belong to user" ) - return persona @@ -612,18 +711,6 @@ def get_prompt_by_name( return result -def get_persona_by_name( - persona_name: str, user: User | None, db_session: Session -) -> Persona | None: - """Admins can see all, regular users can only fetch their own. - If user is None, assume the user is an admin or auth is disabled.""" - stmt = select(Persona).where(Persona.name == persona_name) - if user and user.role != UserRole.ADMIN: - stmt = stmt.where(Persona.user_id == user.id) - result = db_session.execute(stmt).scalar_one_or_none() - return result - - def delete_persona_by_name( persona_name: str, db_session: Session, is_default: bool = True ) -> None: diff --git a/backend/danswer/db/search_settings.py b/backend/danswer/db/search_settings.py new file mode 100644 index 00000000000..1d0c218e10a --- /dev/null +++ b/backend/danswer/db/search_settings.py @@ -0,0 +1,249 @@ +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.configs.model_configs import ASYM_PASSAGE_PREFIX +from danswer.configs.model_configs import ASYM_QUERY_PREFIX +from danswer.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import DOC_EMBEDDING_DIM +from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import NORMALIZE_EMBEDDINGS +from danswer.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM +from danswer.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.llm import fetch_embedding_provider +from danswer.db.models import CloudEmbeddingProvider +from danswer.db.models import IndexModelStatus +from danswer.db.models import SearchSettings +from danswer.indexing.models import IndexingSetting +from danswer.natural_language_processing.search_nlp_models import clean_model_name +from danswer.search.models import SavedSearchSettings +from danswer.server.manage.embedding.models import ( + CloudEmbeddingProvider as ServerCloudEmbeddingProvider, +) +from danswer.utils.logger import setup_logger +from shared_configs.configs import PRESERVED_SEARCH_FIELDS +from shared_configs.enums import EmbeddingProvider + +logger = setup_logger() + + +def create_search_settings( + search_settings: SavedSearchSettings, + db_session: Session, + status: IndexModelStatus = IndexModelStatus.FUTURE, +) -> SearchSettings: + embedding_model = SearchSettings( + model_name=search_settings.model_name, + model_dim=search_settings.model_dim, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + status=status, + index_name=search_settings.index_name, + provider_type=search_settings.provider_type, + multipass_indexing=search_settings.multipass_indexing, + multilingual_expansion=search_settings.multilingual_expansion, + disable_rerank_for_streaming=search_settings.disable_rerank_for_streaming, + rerank_model_name=search_settings.rerank_model_name, + rerank_provider_type=search_settings.rerank_provider_type, + rerank_api_key=search_settings.rerank_api_key, + num_rerank=search_settings.num_rerank, + ) + + db_session.add(embedding_model) + db_session.commit() + + return embedding_model + + +def get_embedding_provider_from_provider_type( + db_session: Session, provider_type: EmbeddingProvider +) -> CloudEmbeddingProvider | None: + query = select(CloudEmbeddingProvider).where( + CloudEmbeddingProvider.provider_type == provider_type + ) + provider = db_session.execute(query).scalars().first() + return provider if provider else None + + +def get_current_db_embedding_provider( + db_session: Session, +) -> ServerCloudEmbeddingProvider | None: + search_settings = get_current_search_settings(db_session=db_session) + + if search_settings.provider_type is None: + return None + + embedding_provider = fetch_embedding_provider( + db_session=db_session, + provider_type=search_settings.provider_type, + ) + if embedding_provider is None: + raise RuntimeError("No embedding provider exists for this model.") + + current_embedding_provider = ServerCloudEmbeddingProvider.from_request( + cloud_provider_model=embedding_provider + ) + + return current_embedding_provider + + +def get_current_search_settings(db_session: Session) -> SearchSettings: + query = ( + select(SearchSettings) + .where(SearchSettings.status == IndexModelStatus.PRESENT) + .order_by(SearchSettings.id.desc()) + ) + result = db_session.execute(query) + latest_settings = result.scalars().first() + + if not latest_settings: + raise RuntimeError("No search settings specified, DB is not in a valid state") + return latest_settings + + +def get_secondary_search_settings(db_session: Session) -> SearchSettings | None: + query = ( + select(SearchSettings) + .where(SearchSettings.status == IndexModelStatus.FUTURE) + .order_by(SearchSettings.id.desc()) + ) + result = db_session.execute(query) + latest_settings = result.scalars().first() + + return latest_settings + + +def get_multilingual_expansion(db_session: Session | None = None) -> list[str]: + if db_session is None: + with Session(get_sqlalchemy_engine()) as db_session: + search_settings = get_current_search_settings(db_session) + else: + search_settings = get_current_search_settings(db_session) + if not search_settings: + return [] + return search_settings.multilingual_expansion + + +def update_search_settings( + current_settings: SearchSettings, + updated_settings: SavedSearchSettings, + preserved_fields: list[str], +) -> None: + for field, value in updated_settings.dict().items(): + if field not in preserved_fields: + setattr(current_settings, field, value) + + +def update_current_search_settings( + db_session: Session, + search_settings: SavedSearchSettings, + preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS, +) -> None: + current_settings = get_current_search_settings(db_session) + if not current_settings: + logger.warning("No current search settings found to update") + return + + update_search_settings(current_settings, search_settings, preserved_fields) + db_session.commit() + logger.info("Current search settings updated successfully") + + +def update_secondary_search_settings( + db_session: Session, + search_settings: SavedSearchSettings, + preserved_fields: list[str] = PRESERVED_SEARCH_FIELDS, +) -> None: + secondary_settings = get_secondary_search_settings(db_session) + if not secondary_settings: + logger.warning("No secondary search settings found to update") + return + + preserved_fields = PRESERVED_SEARCH_FIELDS + update_search_settings(secondary_settings, search_settings, preserved_fields) + + db_session.commit() + logger.info("Secondary search settings updated successfully") + + +def update_search_settings_status( + search_settings: SearchSettings, new_status: IndexModelStatus, db_session: Session +) -> None: + search_settings.status = new_status + db_session.commit() + + +def user_has_overridden_embedding_model() -> bool: + return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL + + +def get_old_default_search_settings() -> SearchSettings: + is_overridden = user_has_overridden_embedding_model() + return SearchSettings( + model_name=( + DOCUMENT_ENCODER_MODEL + if is_overridden + else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL + ), + model_dim=( + DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM + ), + normalize=( + NORMALIZE_EMBEDDINGS + if is_overridden + else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS + ), + query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""), + passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""), + status=IndexModelStatus.PRESENT, + index_name="danswer_chunk", + ) + + +def get_new_default_search_settings(is_present: bool) -> SearchSettings: + return SearchSettings( + model_name=DOCUMENT_ENCODER_MODEL, + model_dim=DOC_EMBEDDING_DIM, + normalize=NORMALIZE_EMBEDDINGS, + query_prefix=ASYM_QUERY_PREFIX, + passage_prefix=ASYM_PASSAGE_PREFIX, + status=IndexModelStatus.PRESENT if is_present else IndexModelStatus.FUTURE, + index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}", + ) + + +def get_old_default_embedding_model() -> IndexingSetting: + is_overridden = user_has_overridden_embedding_model() + return IndexingSetting( + model_name=( + DOCUMENT_ENCODER_MODEL + if is_overridden + else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL + ), + model_dim=( + DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM + ), + normalize=( + NORMALIZE_EMBEDDINGS + if is_overridden + else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS + ), + query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""), + passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""), + index_name="danswer_chunk", + multipass_indexing=False, + ) + + +def get_new_default_embedding_model() -> IndexingSetting: + return IndexingSetting( + model_name=DOCUMENT_ENCODER_MODEL, + model_dim=DOC_EMBEDDING_DIM, + normalize=NORMALIZE_EMBEDDINGS, + query_prefix=ASYM_QUERY_PREFIX, + passage_prefix=ASYM_PASSAGE_PREFIX, + index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}", + multipass_indexing=False, + ) diff --git a/backend/danswer/db/slack_bot_config.py b/backend/danswer/db/slack_bot_config.py index 502d99608f9..322dc4c4ed9 100644 --- a/backend/danswer/db/slack_bot_config.py +++ b/backend/danswer/db/slack_bot_config.py @@ -40,6 +40,7 @@ def create_slack_bot_persona( document_set_ids: list[int], existing_persona_id: int | None = None, num_chunks: float = MAX_CHUNKS_FED_TO_CHAT, + enable_auto_filters: bool = False, ) -> Persona: """NOTE: does not commit changes""" @@ -53,7 +54,7 @@ def create_slack_bot_persona( description="", num_chunks=num_chunks, llm_relevance_filter=True, - llm_filter_extraction=False, + llm_filter_extraction=enable_auto_filters, recency_bias=RecencyBiasSetting.AUTO, prompt_ids=[default_prompt.id], document_set_ids=document_set_ids, diff --git a/backend/danswer/db/standard_answer.py b/backend/danswer/db/standard_answer.py index 3b56c52e641..064a5fa59ef 100644 --- a/backend/danswer/db/standard_answer.py +++ b/backend/danswer/db/standard_answer.py @@ -140,6 +140,17 @@ def fetch_standard_answer_category( ) +def fetch_standard_answer_categories_by_names( + standard_answer_category_names: list[str], + db_session: Session, +) -> Sequence[StandardAnswerCategory]: + return db_session.scalars( + select(StandardAnswerCategory).where( + StandardAnswerCategory.name.in_(standard_answer_category_names) + ) + ).all() + + def fetch_standard_answer_categories_by_ids( standard_answer_category_ids: list[int], db_session: Session, diff --git a/backend/danswer/db/swap_index.py b/backend/danswer/db/swap_index.py index f14a45f296e..8f6d1718924 100644 --- a/backend/danswer/db/swap_index.py +++ b/backend/danswer/db/swap_index.py @@ -1,15 +1,17 @@ from sqlalchemy.orm import Session +from danswer.configs.constants import KV_REINDEX_KEY from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.connector_credential_pair import resync_cc_pair -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model -from danswer.db.embedding_model import update_embedding_model_status from danswer.db.enums import IndexModelStatus from danswer.db.index_attempt import cancel_indexing_attempts_past_model from danswer.db.index_attempt import ( count_unique_cc_pairs_with_successful_index_attempts, ) +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.search_settings import update_search_settings_status +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.utils.logger import setup_logger logger = setup_logger() @@ -22,13 +24,13 @@ def check_index_swap(db_session: Session) -> None: # Default CC-pair created for Ingestion API unused here all_cc_pairs = get_connector_credential_pairs(db_session) cc_pair_count = max(len(all_cc_pairs) - 1, 0) - embedding_model = get_secondary_db_embedding_model(db_session) + search_settings = get_secondary_search_settings(db_session) - if not embedding_model: + if not search_settings: return unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts( - embedding_model_id=embedding_model.id, db_session=db_session + search_settings_id=search_settings.id, db_session=db_session ) # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this @@ -38,20 +40,23 @@ def check_index_swap(db_session: Session) -> None: if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings: # Swap indices - now_old_embedding_model = get_current_db_embedding_model(db_session) - update_embedding_model_status( - embedding_model=now_old_embedding_model, + now_old_search_settings = get_current_search_settings(db_session) + update_search_settings_status( + search_settings=now_old_search_settings, new_status=IndexModelStatus.PAST, db_session=db_session, ) - update_embedding_model_status( - embedding_model=embedding_model, + update_search_settings_status( + search_settings=search_settings, new_status=IndexModelStatus.PRESENT, db_session=db_session, ) if cc_pair_count > 0: + kv_store = get_dynamic_config_store() + kv_store.store(KV_REINDEX_KEY, False) + # Expire jobs for the now past index/embedding model cancel_indexing_attempts_past_model(db_session) diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py index 66418b948e7..688b8a11272 100644 --- a/backend/danswer/db/tag.py +++ b/backend/danswer/db/tag.py @@ -1,5 +1,6 @@ from sqlalchemy import delete from sqlalchemy import func +from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy.orm import Session @@ -107,18 +108,28 @@ def create_or_add_document_tag_list( def get_tags_by_value_prefix_for_source_types( + tag_key_prefix: str | None, tag_value_prefix: str | None, sources: list[DocumentSource] | None, + limit: int | None, db_session: Session, ) -> list[Tag]: query = select(Tag) - if tag_value_prefix: - query = query.where(Tag.tag_value.startswith(tag_value_prefix)) + if tag_key_prefix or tag_value_prefix: + conditions = [] + if tag_key_prefix: + conditions.append(Tag.tag_key.ilike(f"{tag_key_prefix}%")) + if tag_value_prefix: + conditions.append(Tag.tag_value.ilike(f"{tag_value_prefix}%")) + query = query.where(or_(*conditions)) if sources: query = query.where(Tag.source.in_(sources)) + if limit: + query = query.limit(limit) + result = db_session.execute(query) tags = result.scalars().all() diff --git a/backend/danswer/db/users.py b/backend/danswer/db/users.py index f8a3938027f..d824ccfd921 100644 --- a/backend/danswer/db/users.py +++ b/backend/danswer/db/users.py @@ -1,21 +1,32 @@ from collections.abc import Sequence +from uuid import UUID +from sqlalchemy import select from sqlalchemy.orm import Session -from sqlalchemy.schema import Column from danswer.db.models import User -def list_users(db_session: Session, q: str = "") -> Sequence[User]: +def list_users( + db_session: Session, email_filter_string: str = "", user: User | None = None +) -> Sequence[User]: """List all users. No pagination as of now, as the # of users is assumed to be relatively small (<< 1 million)""" - query = db_session.query(User) - if q: - query = query.filter(Column("email").ilike("%{}%".format(q))) - return query.all() + stmt = select(User) + + if email_filter_string: + stmt = stmt.where(User.email.ilike(f"%{email_filter_string}%")) # type: ignore + + return db_session.scalars(stmt).unique().all() def get_user_by_email(email: str, db_session: Session) -> User | None: user = db_session.query(User).filter(User.email == email).first() # type: ignore return user + + +def fetch_user_by_id(db_session: Session, user_id: UUID) -> User | None: + user = db_session.query(User).filter(User.id == user_id).first() # type: ignore + + return user diff --git a/backend/danswer/document_index/document_index_utils.py b/backend/danswer/document_index/document_index_utils.py index 271fd0cc2e7..fab7b85ef48 100644 --- a/backend/danswer/document_index/document_index_utils.py +++ b/backend/danswer/document_index/document_index_utils.py @@ -3,8 +3,8 @@ from sqlalchemy.orm import Session -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings from danswer.indexing.models import IndexChunk from danswer.search.models import InferenceChunk @@ -14,13 +14,13 @@ def get_both_index_names(db_session: Session) -> tuple[str, str | None]: - model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) - model_new = get_secondary_db_embedding_model(db_session) - if not model_new: - return model.index_name, None + search_settings_new = get_secondary_search_settings(db_session) + if not search_settings_new: + return search_settings.index_name, None - return model.index_name, model_new.index_name + return search_settings.index_name, search_settings_new.index_name def translate_boost_count_to_multiplier(boost: int) -> float: @@ -50,4 +50,11 @@ def get_uuid_from_chunk( unique_identifier_string = "_".join( [doc_str, str(chunk.chunk_id), str(mini_chunk_ind)] ) + if chunk.large_chunk_reference_ids: + unique_identifier_string += "_large" + "_".join( + [ + str(referenced_chunk_id) + for referenced_chunk_id in chunk.large_chunk_reference_ids + ] + ) return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string) diff --git a/backend/danswer/document_index/interfaces.py b/backend/danswer/document_index/interfaces.py index 6adedd45268..2acd0977959 100644 --- a/backend/danswer/document_index/interfaces.py +++ b/backend/danswer/document_index/interfaces.py @@ -6,7 +6,8 @@ from danswer.access.models import DocumentAccess from danswer.indexing.models import DocMetadataAwareIndexChunk from danswer.search.models import IndexFilters -from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceChunkUncleaned +from shared_configs.model_server_models import Embedding @dataclass(frozen=True) @@ -15,6 +16,25 @@ class DocumentInsertionRecord: already_existed: bool +@dataclass(frozen=True) +class VespaChunkRequest: + document_id: str + min_chunk_ind: int | None = None + max_chunk_ind: int | None = None + + @property + def is_capped(self) -> bool: + # If the max chunk index is not None, then the chunk request is capped + # If the min chunk index is None, we can assume the min is 0 + return self.max_chunk_ind is not None + + @property + def range(self) -> int | None: + if self.max_chunk_ind is not None: + return (self.max_chunk_ind - (self.min_chunk_ind or 0)) + 1 + return None + + @dataclass class DocumentMetadata: """ @@ -182,11 +202,10 @@ class IdRetrievalCapable(abc.ABC): @abc.abstractmethod def id_based_retrieval( self, - document_id: str, - min_chunk_ind: int | None, - max_chunk_ind: int | None, - user_access_control_list: list[str] | None = None, - ) -> list[InferenceChunk]: + chunk_requests: list[VespaChunkRequest], + filters: IndexFilters, + batch_retrieval: bool = False, + ) -> list[InferenceChunkUncleaned]: """ Fetch chunk(s) based on document id @@ -196,11 +215,9 @@ def id_based_retrieval( or extended section will have duplicate segments. Parameters: - - document_id: document id for which to retrieve the chunk(s) - - min_chunk_ind: if None then fetch from the start of doc - - max_chunk_ind: - - filters: standard filters object, in this case only the access filter is applied as a - permission check + - chunk_requests: requests containing the document id and the chunk range to retrieve + - filters: Filters to apply to retrieval + - batch_retrieval: If True, perform a batch retrieval Returns: list of chunks for the document id or the specific chunk by the specified chunk index @@ -209,80 +226,6 @@ def id_based_retrieval( raise NotImplementedError -class KeywordCapable(abc.ABC): - """ - Class must implement the keyword search functionality - """ - - @abc.abstractmethod - def keyword_retrieval( - self, - query: str, - filters: IndexFilters, - time_decay_multiplier: float, - num_to_retrieve: int, - offset: int = 0, - ) -> list[InferenceChunk]: - """ - Run keyword search and return a list of chunks. Inference chunks are chunks with all of the - information required for query time purposes. For example, some details of the document - required at indexing time are no longer needed past this point. At the same time, the - matching keywords need to be highlighted. - - NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is - expected to be handled by this function as it may depend on the index implementation. - Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are - done here. - - Parameters: - - query: unmodified user query - - filters: standard filter object - - time_decay_multiplier: how much to decay the document scores as they age. Some queries - based on the persona settings, will have this be a 2x or 3x of the default - - num_to_retrieve: number of highest matching chunks to return - - offset: number of highest matching chunks to skip (kind of like pagination) - - Returns: - best matching chunks based on keyword matching (should be BM25 algorithm ideally) - """ - raise NotImplementedError - - -class VectorCapable(abc.ABC): - """ - Class must implement the vector/semantic search functionality - """ - - @abc.abstractmethod - def semantic_retrieval( - self, - query: str, # Needed for matching purposes - query_embedding: list[float], - filters: IndexFilters, - time_decay_multiplier: float, - num_to_retrieve: int, - offset: int = 0, - ) -> list[InferenceChunk]: - """ - Run vector/semantic search and return a list of inference chunks. - - Parameters: - - query: unmodified user query. This is needed for getting the matching highlighted - keywords - - query_embedding: vector representation of the query, must be of the correct - dimensionality for the primary index - - filters: standard filter object - - time_decay_multiplier: how much to decay the document scores as they age. Some queries - based on the persona settings, will have this be a 2x or 3x of the default - - num_to_retrieve: number of highest matching chunks to return - - offset: number of highest matching chunks to skip (kind of like pagination) - - Returns: - best matching chunks based on vector similarity - """ - raise NotImplementedError - - class HybridCapable(abc.ABC): """ Class must implement hybrid (keyword + vector) search functionality @@ -292,13 +235,14 @@ class HybridCapable(abc.ABC): def hybrid_retrieval( self, query: str, - query_embedding: list[float], + query_embedding: Embedding, + final_keywords: list[str] | None, filters: IndexFilters, + hybrid_alpha: float, time_decay_multiplier: float, num_to_retrieve: int, offset: int = 0, - hybrid_alpha: float | None = None, - ) -> list[InferenceChunk]: + ) -> list[InferenceChunkUncleaned]: """ Run hybrid search and return a list of inference chunks. @@ -312,15 +256,16 @@ def hybrid_retrieval( keywords - query_embedding: vector representation of the query, must be of the correct dimensionality for the primary index + - final_keywords: Final keywords to be used from the query, defaults to query if not set - filters: standard filter object - - time_decay_multiplier: how much to decay the document scores as they age. Some queries - based on the persona settings, will have this be a 2x or 3x of the default - - num_to_retrieve: number of highest matching chunks to return - - offset: number of highest matching chunks to skip (kind of like pagination) - hybrid_alpha: weighting between the keyword and vector search results. It is important that the two scores are normalized to the same range so that a meaningful comparison can be made. 1 for 100% weighting on vector score, 0 for 100% weighting on keyword score. + - time_decay_multiplier: how much to decay the document scores as they age. Some queries + based on the persona settings, will have this be a 2x or 3x of the default + - num_to_retrieve: number of highest matching chunks to return + - offset: number of highest matching chunks to skip (kind of like pagination) Returns: best matching chunks based on weighted sum of keyword and vector/semantic search scores @@ -348,7 +293,7 @@ def admin_retrieval( filters: IndexFilters, num_to_retrieve: int, offset: int = 0, - ) -> list[InferenceChunk]: + ) -> list[InferenceChunkUncleaned]: """ Run the special search for the admin document explorer page @@ -386,7 +331,7 @@ class BaseIndex( """ -class DocumentIndex(KeywordCapable, VectorCapable, HybridCapable, BaseIndex, abc.ABC): +class DocumentIndex(HybridCapable, BaseIndex, abc.ABC): """ A valid document index that can plug into all Danswer flows must implement all of these functionalities, though "technically" it does not need to be keyword or vector capable as diff --git a/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd index 355918506a0..be279f6a611 100644 --- a/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd @@ -20,18 +20,10 @@ schema DANSWER_CHUNK_NAME { # `semantic_identifier` will be the channel name, but the `title` will be empty field title type string { indexing: summary | index | attribute - match { - gram - gram-size: 3 - } index: enable-bm25 } field content type string { indexing: summary | index - match { - gram - gram-size: 3 - } index: enable-bm25 } # duplication of `content` is far from ideal, but is needed for @@ -88,9 +80,16 @@ schema DANSWER_CHUNK_NAME { rank:filter attribute: fast-search } + # If chunk is a large chunk, this will contain the ids of the smaller chunks + field large_chunk_reference_ids type array { + indexing: summary | attribute + } field metadata type string { indexing: summary | attribute } + field metadata_suffix type string { + indexing: summary | attribute + } field doc_updated_at type int { indexing: summary | attribute } @@ -150,43 +149,45 @@ schema DANSWER_CHUNK_NAME { query(query_embedding) tensor(x[VARIABLE_DIM]) } - # This must be separate function for normalize_linear to work - function vector_score() { - expression { - # If no title, the full vector score comes from the content embedding - (query(title_content_ratio) * if(attribute(skip_title), closeness(field, embeddings), closeness(field, title_embedding))) + - ((1 - query(title_content_ratio)) * closeness(field, embeddings)) - } - } - - # This must be separate function for normalize_linear to work - function keyword_score() { + function title_vector_score() { expression { - (query(title_content_ratio) * bm25(title)) + - ((1 - query(title_content_ratio)) * bm25(content)) + # If no good matching titles, then it should use the context embeddings rather than having some + # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest + # matching content score getting the full score + max(closeness(field, embeddings), closeness(field, title_embedding)) } } + # First phase must be vector to allow hits that have no keyword matches first-phase { - expression: vector_score + expression: closeness(field, embeddings) } # Weighted average between Vector Search and BM-25 - # Each is a weighted average between the Title and Content fields - # Finally each doc is boosted by it's user feedback based boost and recency - # If any embedding or index field is missing, it just receives a score of 0 - # Assumptions: - # - For a given query + corpus, the BM-25 scores will be relatively similar in distribution - # therefore not normalizing before combining. - # - For documents without title, it gets a score of 0 for that and this is ok as documents - # without any title match should be penalized. global-phase { expression { ( # Weighted Vector Similarity Score - (query(alpha) * normalize_linear(vector_score)) + + ( + query(alpha) * ( + (query(title_content_ratio) * normalize_linear(title_vector_score)) + + + ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings))) + ) + ) + + + + # Weighted Keyword Similarity Score - ((1 - query(alpha)) * normalize_linear(keyword_score)) + # Note: for the BM25 Title score, it requires decent stopword removal in the query + # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1 + ( + (1 - query(alpha)) * ( + (query(title_content_ratio) * normalize_linear(bm25(title))) + + + ((1 - query(title_content_ratio)) * normalize_linear(bm25(content))) + ) + ) ) # Boost based on user feedback * document_boost @@ -201,8 +202,6 @@ schema DANSWER_CHUNK_NAME { bm25(content) closeness(field, title_embedding) closeness(field, embeddings) - keyword_score - vector_score document_boost recency_bias closest(embeddings) @@ -216,28 +215,4 @@ schema DANSWER_CHUNK_NAME { expression: bm25(content) + (5 * bm25(title)) } } - - # THE ONES BELOW ARE OUT OF DATE, DO NOT USE - # THEY MIGHT NOT EVEN WORK AT ALL - rank-profile keyword_search inherits default, default_rank { - first-phase { - expression: bm25(content) * document_boost * recency_bias - } - - match-features: recency_bias document_boost bm25(content) - } - - rank-profile semantic_searchVARIABLE_DIM inherits default, default_rank { - inputs { - query(query_embedding) tensor(x[VARIABLE_DIM]) - } - - first-phase { - # Cannot do boost with the chosen embedding model because of high default similarity - # This depends on the embedding model chosen - expression: closeness(field, embeddings) - } - - match-features: recency_bias document_boost closest(embeddings) - } } diff --git a/backend/danswer/document_index/vespa/app_config/validation-overrides.xml b/backend/danswer/document_index/vespa/app_config/validation-overrides.xml index 58bb2a0ce71..d1ac1c119e5 100644 --- a/backend/danswer/document_index/vespa/app_config/validation-overrides.xml +++ b/backend/danswer/document_index/vespa/app_config/validation-overrides.xml @@ -2,4 +2,7 @@ schema-removal + indexing-change diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py new file mode 100644 index 00000000000..6a7427630b8 --- /dev/null +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -0,0 +1,424 @@ +import json +import string +from collections.abc import Callable +from collections.abc import Mapping +from datetime import datetime +from datetime import timezone +from typing import Any +from typing import cast + +import requests +from retry import retry + +from danswer.configs.app_configs import LOG_VESPA_TIMING_INFORMATION +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.vespa.shared_utils.vespa_request_builders import ( + build_vespa_filters, +) +from danswer.document_index.vespa.shared_utils.vespa_request_builders import ( + build_vespa_id_based_retrieval_yql, +) +from danswer.document_index.vespa_constants import ACCESS_CONTROL_LIST +from danswer.document_index.vespa_constants import BLURB +from danswer.document_index.vespa_constants import BOOST +from danswer.document_index.vespa_constants import CHUNK_ID +from danswer.document_index.vespa_constants import CONTENT +from danswer.document_index.vespa_constants import CONTENT_SUMMARY +from danswer.document_index.vespa_constants import DOC_UPDATED_AT +from danswer.document_index.vespa_constants import DOCUMENT_ID +from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT +from danswer.document_index.vespa_constants import HIDDEN +from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS +from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE +from danswer.document_index.vespa_constants import METADATA +from danswer.document_index.vespa_constants import METADATA_SUFFIX +from danswer.document_index.vespa_constants import PRIMARY_OWNERS +from danswer.document_index.vespa_constants import RECENCY_BIAS +from danswer.document_index.vespa_constants import SEARCH_ENDPOINT +from danswer.document_index.vespa_constants import SECONDARY_OWNERS +from danswer.document_index.vespa_constants import SECTION_CONTINUATION +from danswer.document_index.vespa_constants import SEMANTIC_IDENTIFIER +from danswer.document_index.vespa_constants import SOURCE_LINKS +from danswer.document_index.vespa_constants import SOURCE_TYPE +from danswer.document_index.vespa_constants import TITLE +from danswer.document_index.vespa_constants import YQL_BASE +from danswer.search.models import IndexFilters +from danswer.search.models import InferenceChunkUncleaned +from danswer.utils.logger import setup_logger +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + +logger = setup_logger() + + +def _process_dynamic_summary( + dynamic_summary: str, max_summary_length: int = 400 +) -> list[str]: + if not dynamic_summary: + return [] + + current_length = 0 + processed_summary: list[str] = [] + for summary_section in dynamic_summary.split(""): + # if we're past the desired max length, break at the last word + if current_length + len(summary_section) >= max_summary_length: + summary_section = summary_section[: max_summary_length - current_length] + summary_section = summary_section.lstrip() # remove any leading whitespace + + # handle the case where the truncated section is either just a + # single (partial) word or if it's empty + first_space = summary_section.find(" ") + if first_space == -1: + # add ``...`` to previous section + if processed_summary: + processed_summary[-1] += "..." + break + + # handle the valid truncated section case + summary_section = summary_section.rsplit(" ", 1)[0] + if summary_section[-1] in string.punctuation: + summary_section = summary_section[:-1] + summary_section += "..." + processed_summary.append(summary_section) + break + + processed_summary.append(summary_section) + current_length += len(summary_section) + + return processed_summary + + +def _vespa_hit_to_inference_chunk( + hit: dict[str, Any], null_score: bool = False +) -> InferenceChunkUncleaned: + fields = cast(dict[str, Any], hit["fields"]) + + # parse fields that are stored as strings, but are really json / datetime + metadata = json.loads(fields[METADATA]) if METADATA in fields else {} + updated_at = ( + datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc) + if DOC_UPDATED_AT in fields + else None + ) + + match_highlights = _process_dynamic_summary( + # fallback to regular `content` if the `content_summary` field + # isn't present + dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]), + ) + semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "") + if not semantic_identifier: + logger.error( + f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier" + ) + + source_links = fields.get(SOURCE_LINKS, {}) + source_links_dict_unprocessed = ( + json.loads(source_links) if isinstance(source_links, str) else source_links + ) + source_links_dict = { + int(k): v + for k, v in cast(dict[str, str], source_links_dict_unprocessed).items() + } + + return InferenceChunkUncleaned( + chunk_id=fields[CHUNK_ID], + blurb=fields.get(BLURB, ""), # Unused + content=fields[CONTENT], # Includes extra title prefix and metadata suffix + source_links=source_links_dict or {0: ""}, + section_continuation=fields[SECTION_CONTINUATION], + document_id=fields[DOCUMENT_ID], + source_type=fields[SOURCE_TYPE], + title=fields.get(TITLE), + semantic_identifier=fields[SEMANTIC_IDENTIFIER], + boost=fields.get(BOOST, 1), + recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0), + score=None if null_score else hit.get("relevance", 0), + hidden=fields.get(HIDDEN, False), + primary_owners=fields.get(PRIMARY_OWNERS), + secondary_owners=fields.get(SECONDARY_OWNERS), + large_chunk_reference_ids=fields.get(LARGE_CHUNK_REFERENCE_IDS, []), + metadata=metadata, + metadata_suffix=fields.get(METADATA_SUFFIX), + match_highlights=match_highlights, + updated_at=updated_at, + ) + + +def _get_chunks_via_visit_api( + chunk_request: VespaChunkRequest, + index_name: str, + filters: IndexFilters, + field_names: list[str] | None = None, + get_large_chunks: bool = False, +) -> list[dict]: + # Constructing the URL for the Visit API + # NOTE: visit API uses the same URL as the document API, but with different params + url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) + + # build the list of fields to retrieve + field_set_list = ( + None + if not field_names + else [f"{index_name}:{field_name}" for field_name in field_names] + ) + acl_fieldset_entry = f"{index_name}:{ACCESS_CONTROL_LIST}" + if ( + field_set_list + and filters.access_control_list + and acl_fieldset_entry not in field_set_list + ): + field_set_list.append(acl_fieldset_entry) + field_set = ",".join(field_set_list) if field_set_list else None + + # build filters + selection = f"{index_name}.document_id=='{chunk_request.document_id}'" + + if chunk_request.is_capped: + selection += f" and {index_name}.chunk_id>={chunk_request.min_chunk_ind or 0}" + selection += f" and {index_name}.chunk_id<={chunk_request.max_chunk_ind}" + if not get_large_chunks: + selection += f" and {index_name}.large_chunk_reference_ids == null" + + # Setting up the selection criteria in the query parameters + params = { + # NOTE: Document Selector Language doesn't allow `contains`, so we can't check + # for the ACL in the selection. Instead, we have to check as a postfilter + "selection": selection, + "continuation": None, + "wantedDocumentCount": 1_000, + "fieldSet": field_set, + } + + document_chunks: list[dict] = [] + while True: + response = requests.get(url, params=params) + try: + response.raise_for_status() + except requests.HTTPError as e: + request_info = f"Headers: {response.request.headers}\nPayload: {params}" + response_info = f"Status Code: {response.status_code}\nResponse Content: {response.text}" + error_base = f"Error occurred getting chunk by Document ID {chunk_request.document_id}" + logger.error( + f"{error_base}:\n" + f"{request_info}\n" + f"{response_info}\n" + f"Exception: {e}" + ) + raise requests.HTTPError(error_base) from e + + # Check if the response contains any documents + response_data = response.json() + if "documents" in response_data: + for document in response_data["documents"]: + if filters.access_control_list: + document_acl = document["fields"].get(ACCESS_CONTROL_LIST) + if not document_acl or not any( + user_acl_entry in document_acl + for user_acl_entry in filters.access_control_list + ): + continue + document_chunks.append(document) + + # Check for continuation token to handle pagination + if "continuation" in response_data and response_data["continuation"]: + params["continuation"] = response_data["continuation"] + else: + break # Exit loop if no continuation token + + return document_chunks + + +def get_all_vespa_ids_for_document_id( + document_id: str, + index_name: str, + filters: IndexFilters | None = None, + get_large_chunks: bool = False, +) -> list[str]: + document_chunks = _get_chunks_via_visit_api( + chunk_request=VespaChunkRequest(document_id=document_id), + index_name=index_name, + filters=filters or IndexFilters(access_control_list=None), + field_names=[DOCUMENT_ID], + get_large_chunks=get_large_chunks, + ) + return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks] + + +def parallel_visit_api_retrieval( + index_name: str, + chunk_requests: list[VespaChunkRequest], + filters: IndexFilters, + get_large_chunks: bool = False, +) -> list[InferenceChunkUncleaned]: + functions_with_args: list[tuple[Callable, tuple]] = [ + ( + _get_chunks_via_visit_api, + (chunk_request, index_name, filters, get_large_chunks), + ) + for chunk_request in chunk_requests + ] + + parallel_results = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=True + ) + + # Any failures to retrieve would give a None, drop the Nones and empty lists + vespa_chunk_sets = [res for res in parallel_results if res] + + flattened_vespa_chunks = [] + for chunk_set in vespa_chunk_sets: + flattened_vespa_chunks.extend(chunk_set) + + inference_chunks = [ + _vespa_hit_to_inference_chunk(chunk, null_score=True) + for chunk in flattened_vespa_chunks + ] + + return inference_chunks + + +@retry(tries=3, delay=1, backoff=2) +def query_vespa( + query_params: Mapping[str, str | int | float] +) -> list[InferenceChunkUncleaned]: + if "query" in query_params and not cast(str, query_params["query"]).strip(): + raise ValueError("No/empty query received") + + params = dict( + **query_params, + **{ + "presentation.timing": True, + } + if LOG_VESPA_TIMING_INFORMATION + else {}, + ) + + response = requests.post( + SEARCH_ENDPOINT, + json=params, + ) + try: + response.raise_for_status() + except requests.HTTPError as e: + request_info = f"Headers: {response.request.headers}\nPayload: {params}" + response_info = ( + f"Status Code: {response.status_code}\n" + f"Response Content: {response.text}" + ) + error_base = "Failed to query Vespa" + logger.error( + f"{error_base}:\n" + f"{request_info}\n" + f"{response_info}\n" + f"Exception: {e}" + ) + raise requests.HTTPError(error_base) from e + + response_json: dict[str, Any] = response.json() + if LOG_VESPA_TIMING_INFORMATION: + logger.debug("Vespa timing info: %s", response_json.get("timing")) + hits = response_json["root"].get("children", []) + + for hit in hits: + if hit["fields"].get(CONTENT) is None: + identifier = hit["fields"].get("documentid") or hit["id"] + logger.error( + f"Vespa Index with Vespa ID {identifier} has no contents. " + f"This is invalid because the vector is not meaningful and keywordsearch cannot " + f"fetch this document" + ) + + filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None] + + inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits] + # Good Debugging Spot + return inference_chunks + + +def _get_chunks_via_batch_search( + index_name: str, + chunk_requests: list[VespaChunkRequest], + filters: IndexFilters, + get_large_chunks: bool = False, +) -> list[InferenceChunkUncleaned]: + if not chunk_requests: + return [] + + filters_str = build_vespa_filters(filters=filters, include_hidden=True) + + yql = ( + YQL_BASE.format(index_name=index_name) + + filters_str + + build_vespa_id_based_retrieval_yql(chunk_requests[0]) + ) + chunk_requests.pop(0) + + for request in chunk_requests: + yql += " or " + build_vespa_id_based_retrieval_yql(request) + params: dict[str, str | int | float] = { + "yql": yql, + "hits": MAX_ID_SEARCH_QUERY_SIZE, + } + + inference_chunks = query_vespa(params) + if not get_large_chunks: + inference_chunks = [ + chunk for chunk in inference_chunks if not chunk.large_chunk_reference_ids + ] + inference_chunks.sort(key=lambda chunk: chunk.chunk_id) + return inference_chunks + + +def batch_search_api_retrieval( + index_name: str, + chunk_requests: list[VespaChunkRequest], + filters: IndexFilters, + get_large_chunks: bool = False, +) -> list[InferenceChunkUncleaned]: + retrieved_chunks: list[InferenceChunkUncleaned] = [] + capped_requests: list[VespaChunkRequest] = [] + uncapped_requests: list[VespaChunkRequest] = [] + chunk_count = 0 + for request in chunk_requests: + # All requests without a chunk range are uncapped + # Uncapped requests are retrieved using the Visit API + range = request.range + if range is None: + uncapped_requests.append(request) + continue + + # If adding the range to the chunk count is greater than the + # max query size, we need to perform a retrieval to avoid hitting the limit + if chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE: + retrieved_chunks.extend( + _get_chunks_via_batch_search( + index_name=index_name, + chunk_requests=capped_requests, + filters=filters, + get_large_chunks=get_large_chunks, + ) + ) + capped_requests = [] + chunk_count = 0 + capped_requests.append(request) + chunk_count += range + + if capped_requests: + retrieved_chunks.extend( + _get_chunks_via_batch_search( + index_name=index_name, + chunk_requests=capped_requests, + filters=filters, + get_large_chunks=get_large_chunks, + ) + ) + + if uncapped_requests: + logger.debug(f"Retrieving {len(uncapped_requests)} uncapped requests") + retrieved_chunks.extend( + parallel_visit_api_retrieval( + index_name, uncapped_requests, filters, get_large_chunks + ) + ) + + return retrieved_chunks diff --git a/backend/danswer/document_index/vespa/deletion.py b/backend/danswer/document_index/vespa/deletion.py new file mode 100644 index 00000000000..3c8b7b97f15 --- /dev/null +++ b/backend/danswer/document_index/vespa/deletion.py @@ -0,0 +1,65 @@ +import concurrent.futures + +import httpx +from retry import retry + +from danswer.document_index.vespa.chunk_retrieval import ( + get_all_vespa_ids_for_document_id, +) +from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT +from danswer.document_index.vespa_constants import NUM_THREADS +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +CONTENT_SUMMARY = "content_summary" + + +@retry(tries=3, delay=1, backoff=2) +def _delete_vespa_doc_chunks( + document_id: str, index_name: str, http_client: httpx.Client +) -> None: + doc_chunk_ids = get_all_vespa_ids_for_document_id( + document_id=document_id, + index_name=index_name, + get_large_chunks=True, + ) + + for chunk_id in doc_chunk_ids: + try: + res = http_client.delete( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" + ) + res.raise_for_status() + except httpx.HTTPStatusError as e: + logger.error(f"Failed to delete chunk, details: {e.response.text}") + raise + + +def delete_vespa_docs( + document_ids: list[str], + index_name: str, + http_client: httpx.Client, + executor: concurrent.futures.ThreadPoolExecutor | None = None, +) -> None: + external_executor = True + + if not executor: + external_executor = False + executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) + + try: + doc_deletion_future = { + executor.submit( + _delete_vespa_doc_chunks, doc_id, index_name, http_client + ): doc_id + for doc_id in document_ids + } + for future in concurrent.futures.as_completed(doc_deletion_future): + # Will raise exception if the deletion raised an exception + future.result() + + finally: + if not external_executor: + executor.shutdown(wait=True) diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index e29d2a010fa..d07da5b06bb 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -1,105 +1,71 @@ import concurrent.futures import io -import json import os -import string +import re import time import zipfile -from collections.abc import Mapping from dataclasses import dataclass from datetime import datetime from datetime import timedelta -from datetime import timezone -from typing import Any from typing import BinaryIO from typing import cast import httpx import requests -from retry import retry -from danswer.configs.app_configs import LOG_VESPA_TIMING_INFORMATION -from danswer.configs.app_configs import VESPA_CONFIG_SERVER_HOST -from danswer.configs.app_configs import VESPA_HOST -from danswer.configs.app_configs import VESPA_PORT -from danswer.configs.app_configs import VESPA_TENANT_PORT from danswer.configs.chat_configs import DOC_TIME_DECAY -from danswer.configs.chat_configs import EDIT_KEYWORD_QUERY -from danswer.configs.chat_configs import HYBRID_ALPHA from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.chat_configs import TITLE_CONTENT_RATIO -from danswer.configs.constants import ACCESS_CONTROL_LIST -from danswer.configs.constants import BLURB -from danswer.configs.constants import BOOST -from danswer.configs.constants import CHUNK_ID -from danswer.configs.constants import CONTENT -from danswer.configs.constants import DOC_UPDATED_AT -from danswer.configs.constants import DOCUMENT_ID -from danswer.configs.constants import DOCUMENT_SETS -from danswer.configs.constants import EMBEDDINGS -from danswer.configs.constants import HIDDEN -from danswer.configs.constants import INDEX_SEPARATOR -from danswer.configs.constants import METADATA -from danswer.configs.constants import METADATA_LIST -from danswer.configs.constants import PRIMARY_OWNERS -from danswer.configs.constants import RECENCY_BIAS -from danswer.configs.constants import SECONDARY_OWNERS -from danswer.configs.constants import SECTION_CONTINUATION -from danswer.configs.constants import SEMANTIC_IDENTIFIER -from danswer.configs.constants import SKIP_TITLE_EMBEDDING -from danswer.configs.constants import SOURCE_LINKS -from danswer.configs.constants import SOURCE_TYPE -from danswer.configs.constants import TITLE -from danswer.configs.constants import TITLE_EMBEDDING -from danswer.configs.constants import TITLE_SEPARATOR -from danswer.configs.model_configs import SEARCH_DISTANCE_CUTOFF -from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( - get_experts_stores_representations, -) -from danswer.document_index.document_index_utils import get_uuid_from_chunk +from danswer.configs.constants import KV_REINDEX_KEY from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import DocumentInsertionRecord from danswer.document_index.interfaces import UpdateRequest -from danswer.document_index.vespa.utils import remove_invalid_unicode_chars -from danswer.document_index.vespa.utils import replace_invalid_doc_id_characters +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.vespa.chunk_retrieval import batch_search_api_retrieval +from danswer.document_index.vespa.chunk_retrieval import ( + get_all_vespa_ids_for_document_id, +) +from danswer.document_index.vespa.chunk_retrieval import ( + parallel_visit_api_retrieval, +) +from danswer.document_index.vespa.chunk_retrieval import query_vespa +from danswer.document_index.vespa.deletion import delete_vespa_docs +from danswer.document_index.vespa.indexing_utils import batch_index_vespa_chunks +from danswer.document_index.vespa.indexing_utils import clean_chunk_id_copy +from danswer.document_index.vespa.indexing_utils import ( + get_existing_documents_from_chunks, +) +from danswer.document_index.vespa.shared_utils.utils import ( + replace_invalid_doc_id_characters, +) +from danswer.document_index.vespa.shared_utils.vespa_request_builders import ( + build_vespa_filters, +) +from danswer.document_index.vespa_constants import ACCESS_CONTROL_LIST +from danswer.document_index.vespa_constants import BATCH_SIZE +from danswer.document_index.vespa_constants import BOOST +from danswer.document_index.vespa_constants import CONTENT_SUMMARY +from danswer.document_index.vespa_constants import DANSWER_CHUNK_REPLACEMENT_PAT +from danswer.document_index.vespa_constants import DATE_REPLACEMENT +from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT +from danswer.document_index.vespa_constants import DOCUMENT_REPLACEMENT_PAT +from danswer.document_index.vespa_constants import DOCUMENT_SETS +from danswer.document_index.vespa_constants import HIDDEN +from danswer.document_index.vespa_constants import NUM_THREADS +from danswer.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT +from danswer.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT +from danswer.document_index.vespa_constants import VESPA_TIMEOUT +from danswer.document_index.vespa_constants import YQL_BASE +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.indexing.models import DocMetadataAwareIndexChunk from danswer.search.models import IndexFilters -from danswer.search.models import InferenceChunk -from danswer.search.retrieval.search_runner import query_processing -from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation +from danswer.search.models import InferenceChunkUncleaned from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger +from shared_configs.model_server_models import Embedding logger = setup_logger() -VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM" -DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME" -DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT" -DATE_REPLACEMENT = "DATE_REPLACEMENT" - -# config server -VESPA_CONFIG_SERVER_URL = f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}" -VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2" - -# main search application -VESPA_APP_CONTAINER_URL = f"http://{VESPA_HOST}:{VESPA_PORT}" -# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd -DOCUMENT_ID_ENDPOINT = ( - f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid" -) -SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/" - -_BATCH_SIZE = 128 # Specific to Vespa -_NUM_THREADS = ( - 32 # since Vespa doesn't allow batching of inserts / updates, we use threads -) -# up from 500ms for now, since we've seen quite a few timeouts -# in the long term, we are looking to improve the performance of Vespa -# so that we can bring this back to default -_VESPA_TIMEOUT = "3s" -# Specific to Vespa, needed for highlighting matching keywords / section -CONTENT_SUMMARY = "content_summary" - @dataclass class _VespaUpdateRequest: @@ -108,592 +74,6 @@ class _VespaUpdateRequest: update_request: dict[str, dict] -@retry(tries=3, delay=1, backoff=2) -def _does_document_exist( - doc_chunk_id: str, - index_name: str, - http_client: httpx.Client, -) -> bool: - """Returns whether the document already exists and the users/group whitelists - Specifically in this case, document refers to a vespa document which is equivalent to a Danswer - chunk. This checks for whether the chunk exists already in the index""" - doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" - doc_fetch_response = http_client.get(doc_url) - - if doc_fetch_response.status_code == 404: - return False - - if doc_fetch_response.status_code != 200: - logger.debug(f"Failed to check for document with URL {doc_url}") - raise RuntimeError( - f"Unexpected fetch document by ID value from Vespa " - f"with error {doc_fetch_response.status_code}" - ) - return True - - -def _vespa_get_updated_at_attribute(t: datetime | None) -> int | None: - if not t: - return None - - if t.tzinfo != timezone.utc: - raise ValueError("Connectors must provide document update time in UTC") - - return int(t.timestamp()) - - -def _get_vespa_chunks_by_document_id( - document_id: str, - index_name: str, - user_access_control_list: list[str] | None = None, - min_chunk_ind: int | None = None, - max_chunk_ind: int | None = None, - field_names: list[str] | None = None, -) -> list[dict]: - # Constructing the URL for the Visit API - # NOTE: visit API uses the same URL as the document API, but with different params - url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) - - # build the list of fields to retrieve - field_set_list = ( - None - if not field_names - else [f"{index_name}:{field_name}" for field_name in field_names] - ) - acl_fieldset_entry = f"{index_name}:{ACCESS_CONTROL_LIST}" - if ( - field_set_list - and user_access_control_list - and acl_fieldset_entry not in field_set_list - ): - field_set_list.append(acl_fieldset_entry) - field_set = ",".join(field_set_list) if field_set_list else None - - # build filters - selection = f"{index_name}.document_id=='{document_id}'" - if min_chunk_ind is not None: - selection += f" and {index_name}.chunk_id>={min_chunk_ind}" - if max_chunk_ind is not None: - selection += f" and {index_name}.chunk_id<={max_chunk_ind}" - - # Setting up the selection criteria in the query parameters - params = { - # NOTE: Document Selector Language doesn't allow `contains`, so we can't check - # for the ACL in the selection. Instead, we have to check as a postfilter - "selection": selection, - "continuation": None, - "wantedDocumentCount": 1_000, - "fieldSet": field_set, - } - - document_chunks: list[dict] = [] - while True: - response = requests.get(url, params=params) - try: - response.raise_for_status() - except requests.HTTPError as e: - request_info = f"Headers: {response.request.headers}\nPayload: {params}" - response_info = f"Status Code: {response.status_code}\nResponse Content: {response.text}" - error_base = f"Error occurred getting chunk by Document ID {document_id}" - logger.error( - f"{error_base}:\n" - f"{request_info}\n" - f"{response_info}\n" - f"Exception: {e}" - ) - raise requests.HTTPError(error_base) from e - - # Check if the response contains any documents - response_data = response.json() - if "documents" in response_data: - for document in response_data["documents"]: - if user_access_control_list: - document_acl = document["fields"].get(ACCESS_CONTROL_LIST) - if not document_acl or not any( - user_acl_entry in document_acl - for user_acl_entry in user_access_control_list - ): - continue - document_chunks.append(document) - document_chunks.extend(response_data["documents"]) - - # Check for continuation token to handle pagination - if "continuation" in response_data and response_data["continuation"]: - params["continuation"] = response_data["continuation"] - else: - break # Exit loop if no continuation token - - return document_chunks - - -def _get_vespa_chunk_ids_by_document_id( - document_id: str, index_name: str, user_access_control_list: list[str] | None = None -) -> list[str]: - document_chunks = _get_vespa_chunks_by_document_id( - document_id=document_id, - index_name=index_name, - user_access_control_list=user_access_control_list, - field_names=[DOCUMENT_ID], - ) - return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks] - - -@retry(tries=3, delay=1, backoff=2) -def _delete_vespa_doc_chunks( - document_id: str, index_name: str, http_client: httpx.Client -) -> None: - doc_chunk_ids = _get_vespa_chunk_ids_by_document_id( - document_id=document_id, index_name=index_name - ) - - for chunk_id in doc_chunk_ids: - try: - res = http_client.delete( - f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" - ) - res.raise_for_status() - except httpx.HTTPStatusError as e: - logger.error(f"Failed to delete chunk, details: {e.response.text}") - raise - - -def _delete_vespa_docs( - document_ids: list[str], - index_name: str, - http_client: httpx.Client, - executor: concurrent.futures.ThreadPoolExecutor | None = None, -) -> None: - external_executor = True - - if not executor: - external_executor = False - executor = concurrent.futures.ThreadPoolExecutor(max_workers=_NUM_THREADS) - - try: - doc_deletion_future = { - executor.submit( - _delete_vespa_doc_chunks, doc_id, index_name, http_client - ): doc_id - for doc_id in document_ids - } - for future in concurrent.futures.as_completed(doc_deletion_future): - # Will raise exception if the deletion raised an exception - future.result() - - finally: - if not external_executor: - executor.shutdown(wait=True) - - -def _get_existing_documents_from_chunks( - chunks: list[DocMetadataAwareIndexChunk], - index_name: str, - http_client: httpx.Client, - executor: concurrent.futures.ThreadPoolExecutor | None = None, -) -> set[str]: - external_executor = True - - if not executor: - external_executor = False - executor = concurrent.futures.ThreadPoolExecutor(max_workers=_NUM_THREADS) - - document_ids: set[str] = set() - try: - chunk_existence_future = { - executor.submit( - _does_document_exist, - str(get_uuid_from_chunk(chunk)), - index_name, - http_client, - ): chunk - for chunk in chunks - } - for future in concurrent.futures.as_completed(chunk_existence_future): - chunk = chunk_existence_future[future] - chunk_already_existed = future.result() - if chunk_already_existed: - document_ids.add(chunk.source_document.id) - - finally: - if not external_executor: - executor.shutdown(wait=True) - - return document_ids - - -@retry(tries=3, delay=1, backoff=2) -def _index_vespa_chunk( - chunk: DocMetadataAwareIndexChunk, index_name: str, http_client: httpx.Client -) -> None: - json_header = { - "Content-Type": "application/json", - } - document = chunk.source_document - # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself - vespa_chunk_id = str(get_uuid_from_chunk(chunk)) - - embeddings = chunk.embeddings - embeddings_name_vector_map = {"full_chunk": embeddings.full_embedding} - if embeddings.mini_chunk_embeddings: - for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings): - embeddings_name_vector_map[f"mini_chunk_{ind}"] = m_c_embed - - title = document.get_title_for_document_index() - - vespa_document_fields = { - DOCUMENT_ID: document.id, - CHUNK_ID: chunk.chunk_id, - BLURB: remove_invalid_unicode_chars(chunk.blurb), - TITLE: remove_invalid_unicode_chars(title) if title else None, - SKIP_TITLE_EMBEDDING: not title, - CONTENT: remove_invalid_unicode_chars(chunk.content), - # This duplication of `content` is needed for keyword highlighting :( - CONTENT_SUMMARY: remove_invalid_unicode_chars(chunk.content), - SOURCE_TYPE: str(document.source.value), - SOURCE_LINKS: json.dumps(chunk.source_links), - SEMANTIC_IDENTIFIER: remove_invalid_unicode_chars(document.semantic_identifier), - SECTION_CONTINUATION: chunk.section_continuation, - METADATA: json.dumps(document.metadata), - # Save as a list for efficient extraction as an Attribute - METADATA_LIST: chunk.source_document.get_metadata_str_attributes(), - EMBEDDINGS: embeddings_name_vector_map, - TITLE_EMBEDDING: chunk.title_embedding, - BOOST: chunk.boost, - DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at), - PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners), - SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners), - # the only `set` vespa has is `weightedset`, so we have to give each - # element an arbitrary weight - ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()}, - DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets}, - } - - vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}" - logger.debug(f'Indexing to URL "{vespa_url}"') - res = http_client.post( - vespa_url, headers=json_header, json={"fields": vespa_document_fields} - ) - try: - res.raise_for_status() - except Exception as e: - logger.exception( - f"Failed to index document: '{document.id}'. Got response: '{res.text}'" - ) - raise e - - -def _batch_index_vespa_chunks( - chunks: list[DocMetadataAwareIndexChunk], - index_name: str, - http_client: httpx.Client, - executor: concurrent.futures.ThreadPoolExecutor | None = None, -) -> None: - external_executor = True - - if not executor: - external_executor = False - executor = concurrent.futures.ThreadPoolExecutor(max_workers=_NUM_THREADS) - - try: - chunk_index_future = { - executor.submit(_index_vespa_chunk, chunk, index_name, http_client): chunk - for chunk in chunks - } - for future in concurrent.futures.as_completed(chunk_index_future): - # Will raise exception if any indexing raised an exception - future.result() - - finally: - if not external_executor: - executor.shutdown(wait=True) - - -def _clear_and_index_vespa_chunks( - chunks: list[DocMetadataAwareIndexChunk], - index_name: str, -) -> set[DocumentInsertionRecord]: - """Receive a list of chunks from a batch of documents and index the chunks into Vespa along - with updating the associated permissions. Assumes that a document will not be split into - multiple chunk batches calling this function multiple times, otherwise only the last set of - chunks will be kept""" - existing_docs: set[str] = set() - - # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for - # indexing / updates / deletes since we have to make a large volume of requests. - with ( - concurrent.futures.ThreadPoolExecutor(max_workers=_NUM_THREADS) as executor, - httpx.Client(http2=True) as http_client, - ): - # Check for existing documents, existing documents need to have all of their chunks deleted - # prior to indexing as the document size (num chunks) may have shrunk - first_chunks = [chunk for chunk in chunks if chunk.chunk_id == 0] - for chunk_batch in batch_generator(first_chunks, _BATCH_SIZE): - existing_docs.update( - _get_existing_documents_from_chunks( - chunks=chunk_batch, - index_name=index_name, - http_client=http_client, - executor=executor, - ) - ) - - for doc_id_batch in batch_generator(existing_docs, _BATCH_SIZE): - _delete_vespa_docs( - document_ids=doc_id_batch, - index_name=index_name, - http_client=http_client, - executor=executor, - ) - - for chunk_batch in batch_generator(chunks, _BATCH_SIZE): - _batch_index_vespa_chunks( - chunks=chunk_batch, - index_name=index_name, - http_client=http_client, - executor=executor, - ) - - all_doc_ids = {chunk.source_document.id for chunk in chunks} - - return { - DocumentInsertionRecord( - document_id=doc_id, - already_existed=doc_id in existing_docs, - ) - for doc_id in all_doc_ids - } - - -def _build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str: - def _build_or_filters(key: str, vals: list[str] | None) -> str: - if vals is None: - return "" - - valid_vals = [val for val in vals if val] - if not key or not valid_vals: - return "" - - eq_elems = [f'{key} contains "{elem}"' for elem in valid_vals] - or_clause = " or ".join(eq_elems) - return f"({or_clause}) and " - - def _build_time_filter( - cutoff: datetime | None, - # Slightly over 3 Months, approximately 1 fiscal quarter - untimed_doc_cutoff: timedelta = timedelta(days=92), - ) -> str: - if not cutoff: - return "" - - # For Documents that don't have an updated at, filter them out for queries asking for - # very recent documents (3 months) default. Documents that don't have an updated at - # time are assigned 3 months for time decay value - include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff - cutoff_secs = int(cutoff.timestamp()) - - if include_untimed: - # Documents without updated_at are assigned -1 as their date - return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and " - - return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and " - - filter_str = f"!({HIDDEN}=true) and " if not include_hidden else "" - - # CAREFUL touching this one, currently there is no second ACL double-check post retrieval - if filters.access_control_list is not None: - filter_str += _build_or_filters( - ACCESS_CONTROL_LIST, filters.access_control_list - ) - - source_strs = ( - [s.value for s in filters.source_type] if filters.source_type else None - ) - filter_str += _build_or_filters(SOURCE_TYPE, source_strs) - - tag_attributes = None - tags = filters.tags - if tags: - tag_attributes = [tag.tag_key + INDEX_SEPARATOR + tag.tag_value for tag in tags] - filter_str += _build_or_filters(METADATA_LIST, tag_attributes) - - filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set) - - filter_str += _build_time_filter(filters.time_cutoff) - - return filter_str - - -def _process_dynamic_summary( - dynamic_summary: str, max_summary_length: int = 400 -) -> list[str]: - if not dynamic_summary: - return [] - - current_length = 0 - processed_summary: list[str] = [] - for summary_section in dynamic_summary.split(""): - # if we're past the desired max length, break at the last word - if current_length + len(summary_section) >= max_summary_length: - summary_section = summary_section[: max_summary_length - current_length] - summary_section = summary_section.lstrip() # remove any leading whitespace - - # handle the case where the truncated section is either just a - # single (partial) word or if it's empty - first_space = summary_section.find(" ") - if first_space == -1: - # add ``...`` to previous section - if processed_summary: - processed_summary[-1] += "..." - break - - # handle the valid truncated section case - summary_section = summary_section.rsplit(" ", 1)[0] - if summary_section[-1] in string.punctuation: - summary_section = summary_section[:-1] - summary_section += "..." - processed_summary.append(summary_section) - break - - processed_summary.append(summary_section) - current_length += len(summary_section) - - return processed_summary - - -def _vespa_hit_to_inference_chunk( - hit: dict[str, Any], null_score: bool = False -) -> InferenceChunk: - fields = cast(dict[str, Any], hit["fields"]) - - # parse fields that are stored as strings, but are really json / datetime - metadata = json.loads(fields[METADATA]) if METADATA in fields else {} - updated_at = ( - datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc) - if DOC_UPDATED_AT in fields - else None - ) - - # The highlights might include the title but this is the best way we have so far to show the highlighting - match_highlights = _process_dynamic_summary( - # fallback to regular `content` if the `content_summary` field - # isn't present - dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]), - ) - semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "") - if not semantic_identifier: - logger.error( - f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier" - ) - - # Remove the title from the first chunk as every chunk already included - # its semantic identifier for LLM - content = fields[CONTENT] - if fields[CHUNK_ID] == 0: - parts = content.split(TITLE_SEPARATOR, maxsplit=1) - content = parts[1] if len(parts) > 1 and "\n" not in parts[0] else content - - # User ran into this, not sure why this could happen, error checking here - blurb = fields.get(BLURB) - if not blurb: - logger.error(f"Chunk with id {fields.get(semantic_identifier)} ") - blurb = "" - - source_links = fields.get(SOURCE_LINKS, {}) - source_links_dict_unprocessed = ( - json.loads(source_links) if isinstance(source_links, str) else source_links - ) - source_links_dict = { - int(k): v - for k, v in cast(dict[str, str], source_links_dict_unprocessed).items() - } - - return InferenceChunk( - chunk_id=fields[CHUNK_ID], - blurb=blurb, - content=content, - source_links=source_links_dict, - section_continuation=fields[SECTION_CONTINUATION], - document_id=fields[DOCUMENT_ID], - source_type=fields[SOURCE_TYPE], - semantic_identifier=fields[SEMANTIC_IDENTIFIER], - boost=fields.get(BOOST, 1), - recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0), - score=None if null_score else hit.get("relevance", 0), - hidden=fields.get(HIDDEN, False), - primary_owners=fields.get(PRIMARY_OWNERS), - secondary_owners=fields.get(SECONDARY_OWNERS), - metadata=metadata, - match_highlights=match_highlights, - updated_at=updated_at, - ) - - -@retry(tries=3, delay=1, backoff=2) -def _query_vespa(query_params: Mapping[str, str | int | float]) -> list[InferenceChunk]: - if "query" in query_params and not cast(str, query_params["query"]).strip(): - raise ValueError("No/empty query received") - - params = dict( - **query_params, - **{ - "presentation.timing": True, - } - if LOG_VESPA_TIMING_INFORMATION - else {}, - ) - - response = requests.post( - SEARCH_ENDPOINT, - json=params, - ) - try: - response.raise_for_status() - except requests.HTTPError as e: - request_info = f"Headers: {response.request.headers}\nPayload: {params}" - response_info = ( - f"Status Code: {response.status_code}\n" - f"Response Content: {response.text}" - ) - error_base = "Failed to query Vespa" - logger.error( - f"{error_base}:\n" - f"{request_info}\n" - f"{response_info}\n" - f"Exception: {e}" - ) - raise requests.HTTPError(error_base) from e - - response_json: dict[str, Any] = response.json() - if LOG_VESPA_TIMING_INFORMATION: - logger.info("Vespa timing info: %s", response_json.get("timing")) - hits = response_json["root"].get("children", []) - - for hit in hits: - if hit["fields"].get(CONTENT) is None: - identifier = hit["fields"].get("documentid") or hit["id"] - logger.error( - f"Vespa Index with Vespa ID {identifier} has no contents. " - f"This is invalid because the vector is not meaningful and keywordsearch cannot " - f"fetch this document" - ) - - filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None] - - inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits] - # Good Debugging Spot - return inference_chunks - - -@retry(tries=3, delay=1, backoff=2) -def _inference_chunk_by_vespa_id(vespa_id: str, index_name: str) -> InferenceChunk: - res = requests.get( - f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_id}" - ) - res.raise_for_status() - - return _vespa_hit_to_inference_chunk(res.json()) - - def in_memory_zip_from_file_bytes(file_contents: dict[str, bytes]) -> BinaryIO: zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: @@ -712,43 +92,22 @@ def _create_document_xml_lines(doc_names: list[str | None]) -> str: return "\n".join(doc_lines) -def _clean_chunk_id_copy( - chunk: DocMetadataAwareIndexChunk, -) -> DocMetadataAwareIndexChunk: - clean_chunk = chunk.copy( - update={ - "source_document": chunk.source_document.copy( - update={ - "id": replace_invalid_doc_id_characters(chunk.source_document.id) - } - ) - } +def add_ngrams_to_schema(schema_content: str) -> str: + # Add the match blocks containing gram and gram-size to title and content fields + schema_content = re.sub( + r"(field title type string \{[^}]*indexing: summary \| index \| attribute)", + r"\1\n match {\n gram\n gram-size: 3\n }", + schema_content, + ) + schema_content = re.sub( + r"(field content type string \{[^}]*indexing: summary \| index)", + r"\1\n match {\n gram\n gram-size: 3\n }", + schema_content, ) - return clean_chunk + return schema_content class VespaIndex(DocumentIndex): - yql_base = ( - f"select " - f"documentid, " - f"{DOCUMENT_ID}, " - f"{CHUNK_ID}, " - f"{BLURB}, " - f"{CONTENT}, " - f"{SOURCE_TYPE}, " - f"{SOURCE_LINKS}, " - f"{SEMANTIC_IDENTIFIER}, " - f"{SECTION_CONTINUATION}, " - f"{BOOST}, " - f"{HIDDEN}, " - f"{DOC_UPDATED_AT}, " - f"{PRIMARY_OWNERS}, " - f"{SECONDARY_OWNERS}, " - f"{METADATA}, " - f"{CONTENT_SUMMARY} " - f"from {{index_name}} where " - ) - def __init__(self, index_name: str, secondary_index_name: str | None) -> None: self.index_name = index_name self.secondary_index_name = secondary_index_name @@ -775,6 +134,13 @@ def ensure_indices_exist( doc_lines = _create_document_xml_lines(schema_names) services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines) + kv_store = get_dynamic_config_store() + + needs_reindexing = False + try: + needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY)) + except Exception: + logger.debug("Could not load the reindexing flag. Using ngrams") with open(overrides_file, "r") as overrides_f: overrides_template = overrides_f.read() @@ -794,10 +160,10 @@ def ensure_indices_exist( with open(schema_file, "r") as schema_f: schema_template = schema_f.read() - schema = schema_template.replace( DANSWER_CHUNK_REPLACEMENT_PAT, self.index_name ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim)) + schema = add_ngrams_to_schema(schema) if needs_reindexing else schema zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8") if self.secondary_index_name: @@ -819,16 +185,64 @@ def index( self, chunks: list[DocMetadataAwareIndexChunk], ) -> set[DocumentInsertionRecord]: + """Receive a list of chunks from a batch of documents and index the chunks into Vespa along + with updating the associated permissions. Assumes that a document will not be split into + multiple chunk batches calling this function multiple times, otherwise only the last set of + chunks will be kept""" # IMPORTANT: This must be done one index at a time, do not use secondary index here - cleaned_chunks = [_clean_chunk_id_copy(chunk) for chunk in chunks] - return _clear_and_index_vespa_chunks( - chunks=cleaned_chunks, index_name=self.index_name - ) + cleaned_chunks = [clean_chunk_id_copy(chunk) for chunk in chunks] + + existing_docs: set[str] = set() + + # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for + # indexing / updates / deletes since we have to make a large volume of requests. + with ( + concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, + httpx.Client(http2=True) as http_client, + ): + # Check for existing documents, existing documents need to have all of their chunks deleted + # prior to indexing as the document size (num chunks) may have shrunk + first_chunks = [chunk for chunk in cleaned_chunks if chunk.chunk_id == 0] + for chunk_batch in batch_generator(first_chunks, BATCH_SIZE): + existing_docs.update( + get_existing_documents_from_chunks( + chunks=chunk_batch, + index_name=self.index_name, + http_client=http_client, + executor=executor, + ) + ) + + for doc_id_batch in batch_generator(existing_docs, BATCH_SIZE): + delete_vespa_docs( + document_ids=doc_id_batch, + index_name=self.index_name, + http_client=http_client, + executor=executor, + ) + + for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE): + batch_index_vespa_chunks( + chunks=chunk_batch, + index_name=self.index_name, + http_client=http_client, + executor=executor, + ) + + all_doc_ids = {chunk.source_document.id for chunk in cleaned_chunks} + + return { + DocumentInsertionRecord( + document_id=doc_id, + already_existed=doc_id in existing_docs, + ) + for doc_id in all_doc_ids + } @staticmethod def _apply_updates_batched( updates: list[_VespaUpdateRequest], - batch_size: int = _BATCH_SIZE, + batch_size: int = BATCH_SIZE, ) -> None: """Runs a batch of updates in parallel via the ThreadPoolExecutor.""" @@ -847,7 +261,7 @@ def _update_chunk( # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for # indexing / updates / deletes since we have to make a large volume of requests. with ( - concurrent.futures.ThreadPoolExecutor(max_workers=_NUM_THREADS) as executor, + concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, httpx.Client(http2=True) as http_client, ): for update_batch in batch_generator(updates, batch_size): @@ -889,14 +303,14 @@ def update(self, update_requests: list[UpdateRequest]) -> None: index_names.append(self.secondary_index_name) chunk_id_start_time = time.monotonic() - with concurrent.futures.ThreadPoolExecutor( - max_workers=_NUM_THREADS - ) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: future_to_doc_chunk_ids = { executor.submit( - _get_vespa_chunk_ids_by_document_id, + get_all_vespa_ids_for_document_id, document_id=document_id, index_name=index_name, + filters=None, + get_large_chunks=True, ): (document_id, index_name) for index_name in index_names for update_request in update_requests @@ -952,7 +366,7 @@ def update(self, update_requests: list[UpdateRequest]) -> None: ) self._apply_updates_batched(processed_updates_requests) - logger.info( + logger.debug( "Finished updating Vespa documents in %.2f seconds", time.monotonic() - update_start, ) @@ -970,132 +384,48 @@ def delete(self, doc_ids: list[str]) -> None: index_names.append(self.secondary_index_name) for index_name in index_names: - _delete_vespa_docs( + delete_vespa_docs( document_ids=doc_ids, index_name=index_name, http_client=http_client ) def id_based_retrieval( self, - document_id: str, - min_chunk_ind: int | None, - max_chunk_ind: int | None, - user_access_control_list: list[str] | None = None, - ) -> list[InferenceChunk]: - document_id = replace_invalid_doc_id_characters(document_id) - - vespa_chunks = _get_vespa_chunks_by_document_id( - document_id=document_id, - index_name=self.index_name, - user_access_control_list=user_access_control_list, - min_chunk_ind=min_chunk_ind, - max_chunk_ind=max_chunk_ind, - ) - - if not vespa_chunks: - return [] - - inference_chunks = [ - _vespa_hit_to_inference_chunk(chunk, null_score=True) - for chunk in vespa_chunks - ] - inference_chunks.sort(key=lambda chunk: chunk.chunk_id) - return inference_chunks - - def keyword_retrieval( - self, - query: str, - filters: IndexFilters, - time_decay_multiplier: float, - num_to_retrieve: int = NUM_RETURNED_HITS, - offset: int = 0, - edit_keyword_query: bool = EDIT_KEYWORD_QUERY, - ) -> list[InferenceChunk]: - # IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY - vespa_where_clauses = _build_vespa_filters(filters) - yql = ( - VespaIndex.yql_base.format(index_name=self.index_name) - + vespa_where_clauses - # `({defaultIndex: "content_summary"}userInput(@query))` section is - # needed for highlighting while the N-gram highlighting is broken / - # not working as desired - + '({grammar: "weakAnd"}userInput(@query) ' - + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))' - ) - - final_query = query_processing(query) if edit_keyword_query else query - - params: dict[str, str | int] = { - "yql": yql, - "query": final_query, - "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier), - "hits": num_to_retrieve, - "offset": offset, - "ranking.profile": "keyword_search", - "timeout": _VESPA_TIMEOUT, - } - - return _query_vespa(params) - - def semantic_retrieval( - self, - query: str, - query_embedding: list[float], + chunk_requests: list[VespaChunkRequest], filters: IndexFilters, - time_decay_multiplier: float, - num_to_retrieve: int = NUM_RETURNED_HITS, - offset: int = 0, - distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF, - edit_keyword_query: bool = EDIT_KEYWORD_QUERY, - ) -> list[InferenceChunk]: - # IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY - vespa_where_clauses = _build_vespa_filters(filters) - yql = ( - VespaIndex.yql_base.format(index_name=self.index_name) - + vespa_where_clauses - + f"(({{targetHits: {10 * num_to_retrieve}}}nearestNeighbor(embeddings, query_embedding)) " - # `({defaultIndex: "content_summary"}userInput(@query))` section is - # needed for highlighting while the N-gram highlighting is broken / - # not working as desired - + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))' - ) - - query_keywords = ( - " ".join(remove_stop_words_and_punctuation(query)) - if edit_keyword_query - else query + batch_retrieval: bool = False, + get_large_chunks: bool = False, + ) -> list[InferenceChunkUncleaned]: + if batch_retrieval: + return batch_search_api_retrieval( + index_name=self.index_name, + chunk_requests=chunk_requests, + filters=filters, + get_large_chunks=get_large_chunks, + ) + return parallel_visit_api_retrieval( + index_name=self.index_name, + chunk_requests=chunk_requests, + filters=filters, + get_large_chunks=get_large_chunks, ) - params: dict[str, str | int] = { - "yql": yql, - "query": query_keywords, # Needed for highlighting - "input.query(query_embedding)": str(query_embedding), - "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier), - "hits": num_to_retrieve, - "offset": offset, - "ranking.profile": f"hybrid_search{len(query_embedding)}", - "timeout": _VESPA_TIMEOUT, - } - - return _query_vespa(params) - def hybrid_retrieval( self, query: str, - query_embedding: list[float], + query_embedding: Embedding, + final_keywords: list[str] | None, filters: IndexFilters, + hybrid_alpha: float, time_decay_multiplier: float, num_to_retrieve: int, offset: int = 0, - hybrid_alpha: float | None = HYBRID_ALPHA, title_content_ratio: float | None = TITLE_CONTENT_RATIO, - distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF, - edit_keyword_query: bool = EDIT_KEYWORD_QUERY, - ) -> list[InferenceChunk]: - vespa_where_clauses = _build_vespa_filters(filters) + ) -> list[InferenceChunkUncleaned]: + vespa_where_clauses = build_vespa_filters(filters) # Needs to be at least as much as the value set in Vespa schema config target_hits = max(10 * num_to_retrieve, 1000) yql = ( - VespaIndex.yql_base.format(index_name=self.index_name) + YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses + f"(({{targetHits: {target_hits}}}nearestNeighbor(embeddings, query_embedding)) " + f"or ({{targetHits: {target_hits}}}nearestNeighbor(title_embedding, query_embedding)) " @@ -1103,30 +433,26 @@ def hybrid_retrieval( + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))' ) - query_keywords = ( - " ".join(remove_stop_words_and_punctuation(query)) - if edit_keyword_query - else query - ) + final_query = " ".join(final_keywords) if final_keywords else query + + logger.debug(f"Query YQL: {yql}") params: dict[str, str | int | float] = { "yql": yql, - "query": query_keywords, + "query": final_query, "input.query(query_embedding)": str(query_embedding), "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier), - "input.query(alpha)": hybrid_alpha - if hybrid_alpha is not None - else HYBRID_ALPHA, + "input.query(alpha)": hybrid_alpha, "input.query(title_content_ratio)": title_content_ratio if title_content_ratio is not None else TITLE_CONTENT_RATIO, "hits": num_to_retrieve, "offset": offset, "ranking.profile": f"hybrid_search{len(query_embedding)}", - "timeout": _VESPA_TIMEOUT, + "timeout": VESPA_TIMEOUT, } - return _query_vespa(params) + return query_vespa(params) def admin_retrieval( self, @@ -1134,10 +460,10 @@ def admin_retrieval( filters: IndexFilters, num_to_retrieve: int = NUM_RETURNED_HITS, offset: int = 0, - ) -> list[InferenceChunk]: - vespa_where_clauses = _build_vespa_filters(filters, include_hidden=True) + ) -> list[InferenceChunkUncleaned]: + vespa_where_clauses = build_vespa_filters(filters, include_hidden=True) yql = ( - VespaIndex.yql_base.format(index_name=self.index_name) + YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses + '({grammar: "weakAnd"}userInput(@query) ' # `({defaultIndex: "content_summary"}userInput(@query))` section is @@ -1152,7 +478,7 @@ def admin_retrieval( "hits": num_to_retrieve, "offset": 0, "ranking.profile": "admin_search", - "timeout": _VESPA_TIMEOUT, + "timeout": VESPA_TIMEOUT, } - return _query_vespa(params) + return query_vespa(params) diff --git a/backend/danswer/document_index/vespa/indexing_utils.py b/backend/danswer/document_index/vespa/indexing_utils.py new file mode 100644 index 00000000000..1b16cfc4947 --- /dev/null +++ b/backend/danswer/document_index/vespa/indexing_utils.py @@ -0,0 +1,227 @@ +import concurrent.futures +import json +from datetime import datetime +from datetime import timezone + +import httpx +from retry import retry + +from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( + get_experts_stores_representations, +) +from danswer.document_index.document_index_utils import get_uuid_from_chunk +from danswer.document_index.vespa.shared_utils.utils import remove_invalid_unicode_chars +from danswer.document_index.vespa.shared_utils.utils import ( + replace_invalid_doc_id_characters, +) +from danswer.document_index.vespa_constants import ACCESS_CONTROL_LIST +from danswer.document_index.vespa_constants import BLURB +from danswer.document_index.vespa_constants import BOOST +from danswer.document_index.vespa_constants import CHUNK_ID +from danswer.document_index.vespa_constants import CONTENT +from danswer.document_index.vespa_constants import CONTENT_SUMMARY +from danswer.document_index.vespa_constants import DOC_UPDATED_AT +from danswer.document_index.vespa_constants import DOCUMENT_ID +from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT +from danswer.document_index.vespa_constants import DOCUMENT_SETS +from danswer.document_index.vespa_constants import EMBEDDINGS +from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS +from danswer.document_index.vespa_constants import METADATA +from danswer.document_index.vespa_constants import METADATA_LIST +from danswer.document_index.vespa_constants import METADATA_SUFFIX +from danswer.document_index.vespa_constants import NUM_THREADS +from danswer.document_index.vespa_constants import PRIMARY_OWNERS +from danswer.document_index.vespa_constants import SECONDARY_OWNERS +from danswer.document_index.vespa_constants import SECTION_CONTINUATION +from danswer.document_index.vespa_constants import SEMANTIC_IDENTIFIER +from danswer.document_index.vespa_constants import SKIP_TITLE_EMBEDDING +from danswer.document_index.vespa_constants import SOURCE_LINKS +from danswer.document_index.vespa_constants import SOURCE_TYPE +from danswer.document_index.vespa_constants import TITLE +from danswer.document_index.vespa_constants import TITLE_EMBEDDING +from danswer.indexing.models import DocMetadataAwareIndexChunk +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +@retry(tries=3, delay=1, backoff=2) +def _does_document_exist( + doc_chunk_id: str, + index_name: str, + http_client: httpx.Client, +) -> bool: + """Returns whether the document already exists and the users/group whitelists + Specifically in this case, document refers to a vespa document which is equivalent to a Danswer + chunk. This checks for whether the chunk exists already in the index""" + doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" + doc_fetch_response = http_client.get(doc_url) + + if doc_fetch_response.status_code == 404: + return False + + if doc_fetch_response.status_code != 200: + logger.debug(f"Failed to check for document with URL {doc_url}") + raise RuntimeError( + f"Unexpected fetch document by ID value from Vespa " + f"with error {doc_fetch_response.status_code}" + ) + return True + + +def _vespa_get_updated_at_attribute(t: datetime | None) -> int | None: + if not t: + return None + + if t.tzinfo != timezone.utc: + raise ValueError("Connectors must provide document update time in UTC") + + return int(t.timestamp()) + + +def get_existing_documents_from_chunks( + chunks: list[DocMetadataAwareIndexChunk], + index_name: str, + http_client: httpx.Client, + executor: concurrent.futures.ThreadPoolExecutor | None = None, +) -> set[str]: + external_executor = True + + if not executor: + external_executor = False + executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) + + document_ids: set[str] = set() + try: + chunk_existence_future = { + executor.submit( + _does_document_exist, + str(get_uuid_from_chunk(chunk)), + index_name, + http_client, + ): chunk + for chunk in chunks + } + for future in concurrent.futures.as_completed(chunk_existence_future): + chunk = chunk_existence_future[future] + chunk_already_existed = future.result() + if chunk_already_existed: + document_ids.add(chunk.source_document.id) + + finally: + if not external_executor: + executor.shutdown(wait=True) + + return document_ids + + +@retry(tries=3, delay=1, backoff=2) +def _index_vespa_chunk( + chunk: DocMetadataAwareIndexChunk, index_name: str, http_client: httpx.Client +) -> None: + json_header = { + "Content-Type": "application/json", + } + document = chunk.source_document + + # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself + vespa_chunk_id = str(get_uuid_from_chunk(chunk)) + embeddings = chunk.embeddings + + embeddings_name_vector_map = {"full_chunk": embeddings.full_embedding} + + if embeddings.mini_chunk_embeddings: + for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings): + embeddings_name_vector_map[f"mini_chunk_{ind}"] = m_c_embed + + title = document.get_title_for_document_index() + + vespa_document_fields = { + DOCUMENT_ID: document.id, + CHUNK_ID: chunk.chunk_id, + BLURB: remove_invalid_unicode_chars(chunk.blurb), + TITLE: remove_invalid_unicode_chars(title) if title else None, + SKIP_TITLE_EMBEDDING: not title, + # For the BM25 index, the keyword suffix is used, the vector is already generated with the more + # natural language representation of the metadata section + CONTENT: remove_invalid_unicode_chars( + f"{chunk.title_prefix}{chunk.content}{chunk.metadata_suffix_keyword}" + ), + # This duplication of `content` is needed for keyword highlighting + # Note that it's not exactly the same as the actual content + # which contains the title prefix and metadata suffix + CONTENT_SUMMARY: remove_invalid_unicode_chars(chunk.content), + SOURCE_TYPE: str(document.source.value), + SOURCE_LINKS: json.dumps(chunk.source_links), + SEMANTIC_IDENTIFIER: remove_invalid_unicode_chars(document.semantic_identifier), + SECTION_CONTINUATION: chunk.section_continuation, + LARGE_CHUNK_REFERENCE_IDS: chunk.large_chunk_reference_ids, + METADATA: json.dumps(document.metadata), + # Save as a list for efficient extraction as an Attribute + METADATA_LIST: chunk.source_document.get_metadata_str_attributes(), + METADATA_SUFFIX: chunk.metadata_suffix_keyword, + EMBEDDINGS: embeddings_name_vector_map, + TITLE_EMBEDDING: chunk.title_embedding, + BOOST: chunk.boost, + DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at), + PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners), + SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners), + # the only `set` vespa has is `weightedset`, so we have to give each + # element an arbitrary weight + ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()}, + DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets}, + } + + vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}" + logger.debug(f'Indexing to URL "{vespa_url}"') + res = http_client.post( + vespa_url, headers=json_header, json={"fields": vespa_document_fields} + ) + try: + res.raise_for_status() + except Exception as e: + logger.exception( + f"Failed to index document: '{document.id}'. Got response: '{res.text}'" + ) + raise e + + +def batch_index_vespa_chunks( + chunks: list[DocMetadataAwareIndexChunk], + index_name: str, + http_client: httpx.Client, + executor: concurrent.futures.ThreadPoolExecutor | None = None, +) -> None: + external_executor = True + + if not executor: + external_executor = False + executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) + + try: + chunk_index_future = { + executor.submit(_index_vespa_chunk, chunk, index_name, http_client): chunk + for chunk in chunks + } + for future in concurrent.futures.as_completed(chunk_index_future): + # Will raise exception if any indexing raised an exception + future.result() + + finally: + if not external_executor: + executor.shutdown(wait=True) + + +def clean_chunk_id_copy( + chunk: DocMetadataAwareIndexChunk, +) -> DocMetadataAwareIndexChunk: + clean_chunk = chunk.copy( + update={ + "source_document": chunk.source_document.copy( + update={ + "id": replace_invalid_doc_id_characters(chunk.source_document.id) + } + ) + } + ) + return clean_chunk diff --git a/backend/danswer/document_index/vespa/utils.py b/backend/danswer/document_index/vespa/shared_utils/utils.py similarity index 100% rename from backend/danswer/document_index/vespa/utils.py rename to backend/danswer/document_index/vespa/shared_utils/utils.py diff --git a/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py new file mode 100644 index 00000000000..65752aa09c1 --- /dev/null +++ b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py @@ -0,0 +1,96 @@ +from datetime import datetime +from datetime import timedelta +from datetime import timezone + +from danswer.configs.constants import INDEX_SEPARATOR +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.vespa_constants import ACCESS_CONTROL_LIST +from danswer.document_index.vespa_constants import CHUNK_ID +from danswer.document_index.vespa_constants import DOC_UPDATED_AT +from danswer.document_index.vespa_constants import DOCUMENT_ID +from danswer.document_index.vespa_constants import DOCUMENT_SETS +from danswer.document_index.vespa_constants import HIDDEN +from danswer.document_index.vespa_constants import METADATA_LIST +from danswer.document_index.vespa_constants import SOURCE_TYPE +from danswer.search.models import IndexFilters +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str: + def _build_or_filters(key: str, vals: list[str] | None) -> str: + if vals is None: + return "" + + valid_vals = [val for val in vals if val] + if not key or not valid_vals: + return "" + + eq_elems = [f'{key} contains "{elem}"' for elem in valid_vals] + or_clause = " or ".join(eq_elems) + return f"({or_clause}) and " + + def _build_time_filter( + cutoff: datetime | None, + # Slightly over 3 Months, approximately 1 fiscal quarter + untimed_doc_cutoff: timedelta = timedelta(days=92), + ) -> str: + if not cutoff: + return "" + + # For Documents that don't have an updated at, filter them out for queries asking for + # very recent documents (3 months) default. Documents that don't have an updated at + # time are assigned 3 months for time decay value + include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff + cutoff_secs = int(cutoff.timestamp()) + + if include_untimed: + # Documents without updated_at are assigned -1 as their date + return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and " + + return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and " + + filter_str = f"!({HIDDEN}=true) and " if not include_hidden else "" + + # CAREFUL touching this one, currently there is no second ACL double-check post retrieval + if filters.access_control_list is not None: + filter_str += _build_or_filters( + ACCESS_CONTROL_LIST, filters.access_control_list + ) + + source_strs = ( + [s.value for s in filters.source_type] if filters.source_type else None + ) + filter_str += _build_or_filters(SOURCE_TYPE, source_strs) + + tag_attributes = None + tags = filters.tags + if tags: + tag_attributes = [tag.tag_key + INDEX_SEPARATOR + tag.tag_value for tag in tags] + filter_str += _build_or_filters(METADATA_LIST, tag_attributes) + + filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set) + + filter_str += _build_time_filter(filters.time_cutoff) + + return filter_str + + +def build_vespa_id_based_retrieval_yql( + chunk_request: VespaChunkRequest, +) -> str: + id_based_retrieval_yql_section = ( + f'({DOCUMENT_ID} contains "{chunk_request.document_id}"' + ) + + if chunk_request.is_capped: + id_based_retrieval_yql_section += ( + f" and {CHUNK_ID} >= {chunk_request.min_chunk_ind or 0}" + ) + id_based_retrieval_yql_section += ( + f" and {CHUNK_ID} <= {chunk_request.max_chunk_ind}" + ) + + id_based_retrieval_yql_section += ")" + return id_based_retrieval_yql_section diff --git a/backend/danswer/document_index/vespa_constants.py b/backend/danswer/document_index/vespa_constants.py new file mode 100644 index 00000000000..0b8949b4264 --- /dev/null +++ b/backend/danswer/document_index/vespa_constants.py @@ -0,0 +1,85 @@ +from danswer.configs.app_configs import VESPA_CONFIG_SERVER_HOST +from danswer.configs.app_configs import VESPA_HOST +from danswer.configs.app_configs import VESPA_PORT +from danswer.configs.app_configs import VESPA_TENANT_PORT +from danswer.configs.constants import SOURCE_TYPE + +VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM" +DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME" +DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT" +DATE_REPLACEMENT = "DATE_REPLACEMENT" + +# config server +VESPA_CONFIG_SERVER_URL = f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}" +VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2" + +# main search application +VESPA_APP_CONTAINER_URL = f"http://{VESPA_HOST}:{VESPA_PORT}" +# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd +DOCUMENT_ID_ENDPOINT = ( + f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid" +) +SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/" + +NUM_THREADS = ( + 32 # since Vespa doesn't allow batching of inserts / updates, we use threads +) +MAX_ID_SEARCH_QUERY_SIZE = 400 +# up from 500ms for now, since we've seen quite a few timeouts +# in the long term, we are looking to improve the performance of Vespa +# so that we can bring this back to default +VESPA_TIMEOUT = "3s" +BATCH_SIZE = 128 # Specific to Vespa + + +DOCUMENT_ID = "document_id" +CHUNK_ID = "chunk_id" +BLURB = "blurb" +CONTENT = "content" +SOURCE_LINKS = "source_links" +SEMANTIC_IDENTIFIER = "semantic_identifier" +TITLE = "title" +SKIP_TITLE_EMBEDDING = "skip_title" +SECTION_CONTINUATION = "section_continuation" +EMBEDDINGS = "embeddings" +TITLE_EMBEDDING = "title_embedding" +ACCESS_CONTROL_LIST = "access_control_list" +DOCUMENT_SETS = "document_sets" +LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids" +METADATA = "metadata" +METADATA_LIST = "metadata_list" +METADATA_SUFFIX = "metadata_suffix" +BOOST = "boost" +DOC_UPDATED_AT = "doc_updated_at" # Indexed as seconds since epoch +PRIMARY_OWNERS = "primary_owners" +SECONDARY_OWNERS = "secondary_owners" +RECENCY_BIAS = "recency_bias" +HIDDEN = "hidden" + +# Specific to Vespa, needed for highlighting matching keywords / section +CONTENT_SUMMARY = "content_summary" + + +YQL_BASE = ( + f"select " + f"documentid, " + f"{DOCUMENT_ID}, " + f"{CHUNK_ID}, " + f"{BLURB}, " + f"{CONTENT}, " + f"{SOURCE_TYPE}, " + f"{SOURCE_LINKS}, " + f"{SEMANTIC_IDENTIFIER}, " + f"{TITLE}, " + f"{SECTION_CONTINUATION}, " + f"{BOOST}, " + f"{HIDDEN}, " + f"{DOC_UPDATED_AT}, " + f"{PRIMARY_OWNERS}, " + f"{SECONDARY_OWNERS}, " + f"{LARGE_CHUNK_REFERENCE_IDS}, " + f"{METADATA}, " + f"{METADATA_SUFFIX}, " + f"{CONTENT_SUMMARY} " + f"from {{index_name}} where " +) diff --git a/backend/danswer/dynamic_configs/port_configs.py b/backend/danswer/dynamic_configs/port_configs.py deleted file mode 100644 index 809c06cbf5b..00000000000 --- a/backend/danswer/dynamic_configs/port_configs.py +++ /dev/null @@ -1,115 +0,0 @@ -import json -from pathlib import Path -from typing import cast - -from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_API_KEY -from danswer.configs.model_configs import GEN_AI_API_VERSION -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.db.engine import get_session_context_manager -from danswer.db.llm import fetch_existing_llm_providers -from danswer.db.llm import update_default_provider -from danswer.db.llm import upsert_llm_provider -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.factory import PostgresBackedDynamicConfigStore -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.server.manage.llm.models import LLMProviderUpsertRequest -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def read_file_system_store(directory_path: str) -> dict: - store = {} - base_path = Path(directory_path) - for file_path in base_path.iterdir(): - if file_path.is_file() and "." not in file_path.name: - with open(file_path, "r") as file: - key = file_path.stem - value = json.load(file) - - if value: - store[key] = value - return store - - -def insert_into_postgres(store_data: dict) -> None: - port_once_key = "file_store_ported" - config_store = PostgresBackedDynamicConfigStore() - try: - config_store.load(port_once_key) - return - except ConfigNotFoundError: - pass - - for key, value in store_data.items(): - config_store.store(key, value) - - config_store.store(port_once_key, True) - - -def port_filesystem_to_postgres(directory_path: str) -> None: - store_data = read_file_system_store(directory_path) - insert_into_postgres(store_data) - - -def port_api_key_to_postgres() -> None: - # can't port over custom, no longer supported - if GEN_AI_MODEL_PROVIDER == "custom": - return - - with get_session_context_manager() as db_session: - # if we already have ported things over / setup providers in the db, don't do anything - if len(fetch_existing_llm_providers(db_session)) > 0: - return - - api_key = GEN_AI_API_KEY - try: - api_key = cast( - str, get_dynamic_config_store().load(GEN_AI_API_KEY_STORAGE_KEY) - ) - except ConfigNotFoundError: - pass - - # if no API key set, don't port anything over - if not api_key: - return - - default_model_name = GEN_AI_MODEL_VERSION - if GEN_AI_MODEL_PROVIDER == "openai" and not default_model_name: - default_model_name = "gpt-4" - - # if no default model name found, don't port anything over - if not default_model_name: - return - - default_fast_model_name = FAST_GEN_AI_MODEL_VERSION - if GEN_AI_MODEL_PROVIDER == "openai" and not default_fast_model_name: - default_fast_model_name = "gpt-3.5-turbo" - - llm_provider_upsert = LLMProviderUpsertRequest( - name=GEN_AI_MODEL_PROVIDER, - provider=GEN_AI_MODEL_PROVIDER, - api_key=api_key, - api_base=GEN_AI_API_ENDPOINT, - api_version=GEN_AI_API_VERSION, - # can't port over any custom configs, since we don't know - # all the possible keys and values that could be in there - custom_config=None, - default_model_name=default_model_name, - fast_default_model_name=default_fast_model_name, - model_names=None, - ) - llm_provider = upsert_llm_provider(db_session, llm_provider_upsert) - update_default_provider(db_session, llm_provider.id) - logger.info(f"Ported over LLM provider:\n\n{llm_provider}") - - # delete the old API key - try: - get_dynamic_config_store().delete(GEN_AI_API_KEY_STORAGE_KEY) - except ConfigNotFoundError: - pass diff --git a/backend/danswer/dynamic_configs/store.py b/backend/danswer/dynamic_configs/store.py index ee4ac3d09ae..cc53da938ad 100644 --- a/backend/danswer/dynamic_configs/store.py +++ b/backend/danswer/dynamic_configs/store.py @@ -8,7 +8,7 @@ from filelock import FileLock from sqlalchemy.orm import Session -from danswer.db.engine import SessionFactory +from danswer.db.engine import get_session_factory from danswer.db.models import KVStore from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import DynamicConfigStore @@ -56,7 +56,8 @@ def delete(self, key: str) -> None: class PostgresBackedDynamicConfigStore(DynamicConfigStore): @contextmanager def get_session(self) -> Iterator[Session]: - session: Session = SessionFactory() + factory = get_session_factory() + session: Session = factory() try: yield session finally: diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py index f96d4a4153d..7143b428714 100644 --- a/backend/danswer/file_processing/extract_file_text.py +++ b/backend/danswer/file_processing/extract_file_text.py @@ -190,7 +190,7 @@ def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: except Exception: logger.error("Unable to decrypt pdf") else: - logger.info("No Password available to to decrypt pdf") + logger.warning("No Password available to to decrypt pdf") if not decrypt_success: # By user request, keep files that are unreadable just so they diff --git a/backend/danswer/file_store/models.py b/backend/danswer/file_store/models.py index f26fa4ca544..d944a2fd270 100644 --- a/backend/danswer/file_store/models.py +++ b/backend/danswer/file_store/models.py @@ -1,7 +1,7 @@ import base64 from enum import Enum from typing import NotRequired -from typing import TypedDict +from typing_extensions import TypedDict # noreorder from pydantic import BaseModel diff --git a/backend/danswer/indexing/chunker.py b/backend/danswer/indexing/chunker.py index b6f59d18901..03a03f30f49 100644 --- a/backend/danswer/indexing/chunker.py +++ b/backend/danswer/indexing/chunker.py @@ -1,188 +1,304 @@ -import abc -from collections.abc import Callable -from typing import TYPE_CHECKING - from danswer.configs.app_configs import BLURB_SIZE -from danswer.configs.app_configs import CHUNK_OVERLAP +from danswer.configs.app_configs import LARGE_CHUNK_RATIO from danswer.configs.app_configs import MINI_CHUNK_SIZE +from danswer.configs.app_configs import SKIP_METADATA_IN_CHUNK from danswer.configs.constants import DocumentSource +from danswer.configs.constants import RETURN_SEPARATOR from danswer.configs.constants import SECTION_SEPARATOR -from danswer.configs.constants import TITLE_SEPARATOR from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE +from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( + get_metadata_keys_to_ignore, +) from danswer.connectors.models import Document from danswer.indexing.models import DocAwareChunk -from danswer.search.search_nlp_models import get_default_tokenizer +from danswer.natural_language_processing.utils import BaseTokenizer from danswer.utils.logger import setup_logger from danswer.utils.text_processing import shared_precompare_cleanup -if TYPE_CHECKING: - from transformers import AutoTokenizer # type:ignore +# Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps +# actually help quality at all +CHUNK_OVERLAP = 0 +# Fairly arbitrary numbers but the general concept is we don't want the title/metadata to +# overwhelm the actual contents of the chunk +# For example in a rare case, this could be 128 tokens for the 512 chunk and title prefix +# could be another 128 tokens leaving 256 for the actual contents +MAX_METADATA_PERCENTAGE = 0.25 +CHUNK_MIN_CONTENT = 256 logger = setup_logger() -ChunkFunc = Callable[[Document], list[DocAwareChunk]] +def _get_metadata_suffix_for_document_index( + metadata: dict[str, str | list[str]], include_separator: bool = False +) -> tuple[str, str]: + """ + Returns the metadata as a natural language string representation with all of the keys and values for the vector embedding + and a string of all of the values for the keyword search -def extract_blurb(text: str, blurb_size: int) -> str: - from llama_index.text_splitter import SentenceSplitter + For example, if we have the following metadata: + { + "author": "John Doe", + "space": "Engineering" + } + The vector embedding string should include the relation between the key and value wheres as for keyword we only want John Doe + and Engineering. The keys are repeat and much more noisy. + """ + if not metadata: + return "", "" - token_count_func = get_default_tokenizer().tokenize - blurb_splitter = SentenceSplitter( - tokenizer=token_count_func, chunk_size=blurb_size, chunk_overlap=0 - ) + metadata_str = "Metadata:\n" + metadata_values = [] + for key, value in metadata.items(): + if key in get_metadata_keys_to_ignore(): + continue - return blurb_splitter.split_text(text)[0] + value_str = ", ".join(value) if isinstance(value, list) else value + if isinstance(value, list): + metadata_values.extend(value) + else: + metadata_values.append(value) -def chunk_large_section( - section_text: str, - section_link_text: str, - document: Document, - start_chunk_id: int, - tokenizer: "AutoTokenizer", - chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE, - chunk_overlap: int = CHUNK_OVERLAP, - blurb_size: int = BLURB_SIZE, -) -> list[DocAwareChunk]: - from llama_index.text_splitter import SentenceSplitter + metadata_str += f"\t{key} - {value_str}\n" - blurb = extract_blurb(section_text, blurb_size) + metadata_semantic = metadata_str.strip() + metadata_keyword = " ".join(metadata_values) - sentence_aware_splitter = SentenceSplitter( - tokenizer=tokenizer.tokenize, chunk_size=chunk_size, chunk_overlap=chunk_overlap + if include_separator: + return RETURN_SEPARATOR + metadata_semantic, RETURN_SEPARATOR + metadata_keyword + return metadata_semantic, metadata_keyword + + +def _combine_chunks(chunks: list[DocAwareChunk], index: int) -> DocAwareChunk: + merged_chunk = DocAwareChunk( + source_document=chunks[0].source_document, + chunk_id=index, + blurb=chunks[0].blurb, + content=chunks[0].content, + source_links=chunks[0].source_links or {}, + section_continuation=(index > 0), + title_prefix=chunks[0].title_prefix, + metadata_suffix_semantic=chunks[0].metadata_suffix_semantic, + metadata_suffix_keyword=chunks[0].metadata_suffix_keyword, + large_chunk_reference_ids=[chunks[0].chunk_id], + mini_chunk_texts=None, ) - split_texts = sentence_aware_splitter.split_text(section_text) + offset = 0 + for i in range(1, len(chunks)): + merged_chunk.content += SECTION_SEPARATOR + chunks[i].content + merged_chunk.large_chunk_reference_ids.append(chunks[i].chunk_id) - chunks = [ - DocAwareChunk( - source_document=document, - chunk_id=start_chunk_id + chunk_ind, - blurb=blurb, - content=chunk_str, - source_links={0: section_link_text}, - section_continuation=(chunk_ind != 0), - ) - for chunk_ind, chunk_str in enumerate(split_texts) + offset += len(SECTION_SEPARATOR) + len(chunks[i - 1].content) + for link_offset, link_text in (chunks[i].source_links or {}).items(): + if merged_chunk.source_links is None: + merged_chunk.source_links = {} + merged_chunk.source_links[link_offset + offset] = link_text + + return merged_chunk + + +def generate_large_chunks(chunks: list[DocAwareChunk]) -> list[DocAwareChunk]: + large_chunks = [ + _combine_chunks(chunks[i : i + LARGE_CHUNK_RATIO], idx) + for idx, i in enumerate(range(0, len(chunks), LARGE_CHUNK_RATIO)) + if len(chunks[i : i + LARGE_CHUNK_RATIO]) > 1 ] - return chunks - - -def chunk_document( - document: Document, - chunk_tok_size: int = DOC_EMBEDDING_CONTEXT_SIZE, - subsection_overlap: int = CHUNK_OVERLAP, - blurb_size: int = BLURB_SIZE, -) -> list[DocAwareChunk]: - title = document.get_title_for_document_index() - title_prefix = title.replace("\n", " ") + TITLE_SEPARATOR if title else "" - tokenizer = get_default_tokenizer() - - chunks: list[DocAwareChunk] = [] - link_offsets: dict[int, str] = {} - chunk_text = "" - for ind, section in enumerate(document.sections): - section_text = title_prefix + section.text if ind == 0 else section.text - section_link_text = section.link or "" - - section_tok_length = len(tokenizer.tokenize(section_text)) - current_tok_length = len(tokenizer.tokenize(chunk_text)) - curr_offset_len = len(shared_precompare_cleanup(chunk_text)) - - # Large sections are considered self-contained/unique therefore they start a new chunk and are not concatenated - # at the end by other sections - if section_tok_length > chunk_tok_size: - if chunk_text: - chunks.append( - DocAwareChunk( - source_document=document, - chunk_id=len(chunks), - blurb=extract_blurb(chunk_text, blurb_size), - content=chunk_text, - source_links=link_offsets, - section_continuation=False, - ) - ) - link_offsets = {} - chunk_text = "" - - large_section_chunks = chunk_large_section( - section_text=section_text, - section_link_text=section_link_text, - document=document, - start_chunk_id=len(chunks), - tokenizer=tokenizer, - chunk_size=chunk_tok_size, - chunk_overlap=subsection_overlap, - blurb_size=blurb_size, - ) - chunks.extend(large_section_chunks) - continue + return large_chunks - # In the case where the whole section is shorter than a chunk, either adding to chunk or start a new one - if ( - current_tok_length - + len(tokenizer.tokenize(SECTION_SEPARATOR)) - + section_tok_length - <= chunk_tok_size - ): - chunk_text += ( - SECTION_SEPARATOR + section_text if chunk_text else section_text - ) - link_offsets[curr_offset_len] = section_link_text - else: - chunks.append( - DocAwareChunk( - source_document=document, - chunk_id=len(chunks), - blurb=extract_blurb(chunk_text, blurb_size), - content=chunk_text, - source_links=link_offsets, - section_continuation=False, - ) + +class Chunker: + """ + Chunks documents into smaller chunks for indexing. + """ + + def __init__( + self, + tokenizer: BaseTokenizer, + enable_multipass: bool = False, + enable_large_chunks: bool = False, + blurb_size: int = BLURB_SIZE, + include_metadata: bool = not SKIP_METADATA_IN_CHUNK, + chunk_token_limit: int = DOC_EMBEDDING_CONTEXT_SIZE, + chunk_overlap: int = CHUNK_OVERLAP, + mini_chunk_size: int = MINI_CHUNK_SIZE, + ) -> None: + from llama_index.text_splitter import SentenceSplitter + + self.include_metadata = include_metadata + self.chunk_token_limit = chunk_token_limit + self.enable_multipass = enable_multipass + self.enable_large_chunks = enable_large_chunks + self.tokenizer = tokenizer + + self.blurb_splitter = SentenceSplitter( + tokenizer=tokenizer.tokenize, + chunk_size=blurb_size, + chunk_overlap=0, + ) + + self.chunk_splitter = SentenceSplitter( + tokenizer=tokenizer.tokenize, + chunk_size=chunk_token_limit, + chunk_overlap=chunk_overlap, + ) + + self.mini_chunk_splitter = ( + SentenceSplitter( + tokenizer=tokenizer.tokenize, + chunk_size=mini_chunk_size, + chunk_overlap=0, ) - link_offsets = {0: section_link_text} - chunk_text = section_text - - # Once we hit the end, if we're still in the process of building a chunk, add what we have - # NOTE: if it's just whitespace, ignore it. - if chunk_text.strip(): - chunks.append( - DocAwareChunk( + if enable_multipass + else None + ) + + def _extract_blurb(self, text: str) -> str: + texts = self.blurb_splitter.split_text(text) + if not texts: + return "" + return texts[0] + + def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None: + if self.mini_chunk_splitter and chunk_text.strip(): + return self.mini_chunk_splitter.split_text(chunk_text) + return None + + def _chunk_document( + self, + document: Document, + title_prefix: str, + metadata_suffix_semantic: str, + metadata_suffix_keyword: str, + content_token_limit: int, + ) -> list[DocAwareChunk]: + """ + Loops through sections of the document, adds metadata and converts them into chunks. + """ + chunks: list[DocAwareChunk] = [] + link_offsets: dict[int, str] = {} + chunk_text = "" + + def _create_chunk( + text: str, + links: dict[int, str], + is_continuation: bool = False, + ) -> DocAwareChunk: + return DocAwareChunk( source_document=document, chunk_id=len(chunks), - blurb=extract_blurb(chunk_text, blurb_size), - content=chunk_text, - source_links=link_offsets, - section_continuation=False, + blurb=self._extract_blurb(text), + content=text, + source_links=links or {0: ""}, + section_continuation=is_continuation, + title_prefix=title_prefix, + metadata_suffix_semantic=metadata_suffix_semantic, + metadata_suffix_keyword=metadata_suffix_keyword, + mini_chunk_texts=self._get_mini_chunk_texts(text), ) - ) - return chunks + for section in document.sections: + section_text = section.text + section_link_text = section.link or "" -def split_chunk_text_into_mini_chunks( - chunk_text: str, mini_chunk_size: int = MINI_CHUNK_SIZE -) -> list[str]: - from llama_index.text_splitter import SentenceSplitter + section_token_count = len(self.tokenizer.tokenize(section_text)) - token_count_func = get_default_tokenizer().tokenize - sentence_aware_splitter = SentenceSplitter( - tokenizer=token_count_func, chunk_size=mini_chunk_size, chunk_overlap=0 - ) + # Large sections are considered self-contained/unique + # Therefore, they start a new chunk and are not concatenated + # at the end by other sections + if section_token_count > content_token_limit: + if chunk_text: + chunks.append(_create_chunk(chunk_text, link_offsets)) + link_offsets = {} + chunk_text = "" - return sentence_aware_splitter.split_text(chunk_text) + split_texts = self.chunk_splitter.split_text(section_text) + for i, split_text in enumerate(split_texts): + chunks.append( + _create_chunk( + text=split_text, + links={0: section_link_text}, + is_continuation=(i != 0), + ) + ) + continue + current_token_count = len(self.tokenizer.tokenize(chunk_text)) + current_offset = len(shared_precompare_cleanup(chunk_text)) + # In the case where the whole section is shorter than a chunk, either add + # to chunk or start a new one + next_section_tokens = ( + len(self.tokenizer.tokenize(SECTION_SEPARATOR)) + section_token_count + ) + if next_section_tokens + current_token_count <= content_token_limit: + if chunk_text: + chunk_text += SECTION_SEPARATOR + chunk_text += section_text + link_offsets[current_offset] = section_link_text + else: + chunks.append(_create_chunk(chunk_text, link_offsets)) + link_offsets = {0: section_link_text} + chunk_text = section_text -class Chunker: - @abc.abstractmethod - def chunk(self, document: Document) -> list[DocAwareChunk]: - raise NotImplementedError + # Once we hit the end, if we're still in the process of building a chunk, add what we have. + # If there is only whitespace left then don't include it. If there are no chunks at all + # from the doc, we can just create a single chunk with the title. + if chunk_text.strip() or not chunks: + chunks.append( + _create_chunk( + chunk_text, + link_offsets or {0: section_link_text}, + ) + ) + # If the chunk does not have any useable content, it will not be indexed + return chunks -class DefaultChunker(Chunker): def chunk(self, document: Document) -> list[DocAwareChunk]: # Specifically for reproducing an issue with gmail if document.source == DocumentSource.GMAIL: logger.debug(f"Chunking {document.semantic_identifier}") - return chunk_document(document) + + title = self._extract_blurb(document.get_title_for_document_index() or "") + title_prefix = title + RETURN_SEPARATOR if title else "" + title_tokens = len(self.tokenizer.tokenize(title_prefix)) + + metadata_suffix_semantic = "" + metadata_suffix_keyword = "" + metadata_tokens = 0 + if self.include_metadata: + ( + metadata_suffix_semantic, + metadata_suffix_keyword, + ) = _get_metadata_suffix_for_document_index( + document.metadata, include_separator=True + ) + metadata_tokens = len(self.tokenizer.tokenize(metadata_suffix_semantic)) + + if metadata_tokens >= self.chunk_token_limit * MAX_METADATA_PERCENTAGE: + # Note: we can keep the keyword suffix even if the semantic suffix is too long to fit in the model + # context, there is no limit for the keyword component + metadata_suffix_semantic = "" + metadata_tokens = 0 + + content_token_limit = self.chunk_token_limit - title_tokens - metadata_tokens + # If there is not enough context remaining then just index the chunk with no prefix/suffix + if content_token_limit <= CHUNK_MIN_CONTENT: + content_token_limit = self.chunk_token_limit + title_prefix = "" + metadata_suffix_semantic = "" + + normal_chunks = self._chunk_document( + document, + title_prefix, + metadata_suffix_semantic, + metadata_suffix_keyword, + content_token_limit, + ) + + if self.enable_multipass and self.enable_large_chunks: + large_chunks = generate_large_chunks(normal_chunks) + normal_chunks.extend(large_chunks) + + return normal_chunks diff --git a/backend/danswer/indexing/embedder.py b/backend/danswer/indexing/embedder.py index 0b542067a89..f7d8f4e7400 100644 --- a/backend/danswer/indexing/embedder.py +++ b/backend/danswer/indexing/embedder.py @@ -3,23 +3,21 @@ from sqlalchemy.orm import Session -from danswer.configs.app_configs import ENABLE_MINI_CHUNK -from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS -from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model -from danswer.db.models import EmbeddingModel as DbEmbeddingModel from danswer.db.models import IndexModelStatus -from danswer.indexing.chunker import split_chunk_text_into_mini_chunks +from danswer.db.models import SearchSettings +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings from danswer.indexing.models import ChunkEmbedding from danswer.indexing.models import DocAwareChunk from danswer.indexing.models import IndexChunk -from danswer.search.enums import EmbedTextType -from danswer.search.search_nlp_models import EmbeddingModel -from danswer.utils.batching import batch_list +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel from danswer.utils.logger import setup_logger +from danswer.utils.timing import log_function_time from shared_configs.configs import INDEXING_MODEL_SERVER_HOST from shared_configs.configs import INDEXING_MODEL_SERVER_PORT +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import EmbedTextType +from shared_configs.model_server_models import Embedding logger = setup_logger() @@ -32,14 +30,34 @@ def __init__( normalize: bool, query_prefix: str | None, passage_prefix: str | None, + provider_type: EmbeddingProvider | None, + api_key: str | None, ): self.model_name = model_name self.normalize = normalize self.query_prefix = query_prefix self.passage_prefix = passage_prefix + self.provider_type = provider_type + self.api_key = api_key + + self.embedding_model = EmbeddingModel( + model_name=model_name, + query_prefix=query_prefix, + passage_prefix=passage_prefix, + normalize=normalize, + api_key=api_key, + provider_type=provider_type, + # The below are globally set, this flow always uses the indexing one + server_host=INDEXING_MODEL_SERVER_HOST, + server_port=INDEXING_MODEL_SERVER_PORT, + retrim_content=True, + ) @abstractmethod - def embed_chunks(self, chunks: list[DocAwareChunk]) -> list[IndexChunk]: + def embed_chunks( + self, + chunks: list[DocAwareChunk], + ) -> list[IndexChunk]: raise NotImplementedError @@ -50,84 +68,73 @@ def __init__( normalize: bool, query_prefix: str | None, passage_prefix: str | None, + provider_type: EmbeddingProvider | None = None, + api_key: str | None = None, ): - super().__init__(model_name, normalize, query_prefix, passage_prefix) - self.max_seq_length = DOC_EMBEDDING_CONTEXT_SIZE # Currently not customizable - - self.embedding_model = EmbeddingModel( - model_name=model_name, - query_prefix=query_prefix, - passage_prefix=passage_prefix, - normalize=normalize, - # The below are globally set, this flow always uses the indexing one - server_host=INDEXING_MODEL_SERVER_HOST, - server_port=INDEXING_MODEL_SERVER_PORT, + super().__init__( + model_name, normalize, query_prefix, passage_prefix, provider_type, api_key ) + @log_function_time() def embed_chunks( self, chunks: list[DocAwareChunk], - batch_size: int = BATCH_SIZE_ENCODE_CHUNKS, - enable_mini_chunk: bool = ENABLE_MINI_CHUNK, ) -> list[IndexChunk]: - # Cache the Title embeddings to only have to do it once - title_embed_dict: dict[str, list[float]] = {} - embedded_chunks: list[IndexChunk] = [] - - # Create Mini Chunks for more precise matching of details - # Off by default with unedited settings - chunk_texts = [] - chunk_mini_chunks_count = {} - for chunk_ind, chunk in enumerate(chunks): - chunk_texts.append(chunk.content) - mini_chunk_texts = ( - split_chunk_text_into_mini_chunks(chunk.content) - if enable_mini_chunk - else [] - ) - chunk_texts.extend(mini_chunk_texts) - chunk_mini_chunks_count[chunk_ind] = 1 + len(mini_chunk_texts) - - # Batching for embedding - text_batches = batch_list(chunk_texts, batch_size) - - embeddings: list[list[float]] = [] - len_text_batches = len(text_batches) - for idx, text_batch in enumerate(text_batches, start=1): - logger.debug(f"Embedding Content Texts batch {idx} of {len_text_batches}") - # Normalize embeddings is only configured via model_configs.py, be sure to use right - # value for the set loss - embeddings.extend( - self.embedding_model.encode(text_batch, text_type=EmbedTextType.PASSAGE) - ) - - # Replace line above with the line below for easy debugging of indexing flow - # skipping the actual model - # embeddings.extend([[0.0] * 384 for _ in range(len(text_batch))]) + # All chunks at this point must have some non-empty content + flat_chunk_texts: list[str] = [] + large_chunks_present = False + for chunk in chunks: + if chunk.large_chunk_reference_ids: + large_chunks_present = True + chunk_text = ( + f"{chunk.title_prefix}{chunk.content}{chunk.metadata_suffix_semantic}" + ) or chunk.source_document.get_title_for_document_index() + + if not chunk_text: + # This should never happen, the document would have been dropped + # before getting to this point + raise ValueError(f"Chunk has no content: {chunk.to_short_descriptor()}") + + flat_chunk_texts.append(chunk_text) + + if chunk.mini_chunk_texts: + flat_chunk_texts.extend(chunk.mini_chunk_texts) + + embeddings = self.embedding_model.encode( + texts=flat_chunk_texts, + text_type=EmbedTextType.PASSAGE, + large_chunks_present=large_chunks_present, + ) chunk_titles = { chunk.source_document.get_title_for_document_index() for chunk in chunks } # Drop any None or empty strings + # If there is no title or the title is empty, the title embedding field will be null + # which is ok, it just won't contribute at all to the scoring. chunk_titles_list = [title for title in chunk_titles if title] - # Embed Titles in batches - title_batches = batch_list(chunk_titles_list, batch_size) - len_title_batches = len(title_batches) - for ind_batch, title_batch in enumerate(title_batches, start=1): - logger.debug(f"Embedding Titles batch {ind_batch} of {len_title_batches}") + # Cache the Title embeddings to only have to do it once + title_embed_dict: dict[str, Embedding] = {} + if chunk_titles_list: title_embeddings = self.embedding_model.encode( - title_batch, text_type=EmbedTextType.PASSAGE + chunk_titles_list, text_type=EmbedTextType.PASSAGE ) title_embed_dict.update( - {title: vector for title, vector in zip(title_batch, title_embeddings)} + { + title: vector + for title, vector in zip(chunk_titles_list, title_embeddings) + } ) # Mapping embeddings to chunks + embedded_chunks: list[IndexChunk] = [] embedding_ind_start = 0 - for chunk_ind, chunk in enumerate(chunks): - num_embeddings = chunk_mini_chunks_count[chunk_ind] + for chunk in chunks: + num_embeddings = 1 + ( + len(chunk.mini_chunk_texts) if chunk.mini_chunk_texts else 0 + ) chunk_embeddings = embeddings[ embedding_ind_start : embedding_ind_start + num_embeddings ] @@ -149,7 +156,7 @@ def embed_chunks( title_embed_dict[title] = title_embedding new_embedded_chunk = IndexChunk( - **chunk.dict(), + **chunk.model_dump(), embeddings=ChunkEmbedding( full_embedding=chunk_embeddings[0], mini_chunk_embeddings=chunk_embeddings[1:], @@ -161,23 +168,38 @@ def embed_chunks( return embedded_chunks + @classmethod + def from_db_search_settings( + cls, search_settings: SearchSettings + ) -> "DefaultIndexingEmbedder": + return cls( + model_name=search_settings.model_name, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + provider_type=search_settings.provider_type, + api_key=search_settings.api_key, + ) + -def get_embedding_model_from_db_embedding_model( +def get_embedding_model_from_search_settings( db_session: Session, index_model_status: IndexModelStatus = IndexModelStatus.PRESENT ) -> IndexingEmbedder: - db_embedding_model: DbEmbeddingModel | None + search_settings: SearchSettings | None if index_model_status == IndexModelStatus.PRESENT: - db_embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) elif index_model_status == IndexModelStatus.FUTURE: - db_embedding_model = get_secondary_db_embedding_model(db_session) - if not db_embedding_model: + search_settings = get_secondary_search_settings(db_session) + if not search_settings: raise RuntimeError("No secondary index configured") else: raise RuntimeError("Not supporting embedding model rollbacks") return DefaultIndexingEmbedder( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - query_prefix=db_embedding_model.query_prefix, - passage_prefix=db_embedding_model.passage_prefix, + model_name=search_settings.model_name, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + provider_type=search_settings.provider_type, + api_key=search_settings.api_key, ) diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index cb4de6017c1..de62133fc09 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -1,10 +1,14 @@ +import traceback from functools import partial -from itertools import chain from typing import Protocol +from pydantic import BaseModel +from pydantic import ConfigDict from sqlalchemy.orm import Session from danswer.access.access import get_access_for_documents +from danswer.configs.app_configs import ENABLE_MULTIPASS_INDEXING +from danswer.configs.app_configs import INDEXING_EXCEPTION_LIMIT from danswer.configs.constants import DEFAULT_BOOST from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( get_experts_stores_representations, @@ -16,26 +20,37 @@ from danswer.db.document import update_docs_updated_at from danswer.db.document import upsert_documents_complete from danswer.db.document_set import fetch_document_sets_for_documents +from danswer.db.index_attempt import create_index_attempt_error from danswer.db.models import Document as DBDocument +from danswer.db.search_settings import get_current_search_settings from danswer.db.tag import create_or_add_document_tag from danswer.db.tag import create_or_add_document_tag_list from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import DocumentMetadata from danswer.indexing.chunker import Chunker -from danswer.indexing.chunker import DefaultChunker from danswer.indexing.embedder import IndexingEmbedder from danswer.indexing.models import DocAwareChunk from danswer.indexing.models import DocMetadataAwareIndexChunk from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time +from shared_configs.enums import EmbeddingProvider logger = setup_logger() +class DocumentBatchPrepareContext(BaseModel): + updatable_docs: list[Document] + id_to_db_doc_map: dict[str, DBDocument] + model_config = ConfigDict(arbitrary_types_allowed=True) + + class IndexingPipelineProtocol(Protocol): def __call__( - self, documents: list[Document], index_attempt_metadata: IndexAttemptMetadata - ) -> tuple[int, int]: ... + self, + document_batch: list[Document], + index_attempt_metadata: IndexAttemptMetadata, + ) -> tuple[int, int]: + ... def upsert_documents_in_db( @@ -109,26 +124,103 @@ def get_doc_ids_to_update( return updatable_docs -@log_function_time() -def index_doc_batch( +def index_doc_batch_with_handler( *, chunker: Chunker, embedder: IndexingEmbedder, document_index: DocumentIndex, - documents: list[Document], + document_batch: list[Document], index_attempt_metadata: IndexAttemptMetadata, + attempt_id: int | None, db_session: Session, ignore_time_skip: bool = False, ) -> tuple[int, int]: - """Takes different pieces of the indexing pipeline and applies it to a batch of documents - Note that the documents should already be batched at this point so that it does not inflate the - memory requirements""" + r = (0, 0) + try: + r = index_doc_batch( + chunker=chunker, + embedder=embedder, + document_index=document_index, + document_batch=document_batch, + index_attempt_metadata=index_attempt_metadata, + db_session=db_session, + ignore_time_skip=ignore_time_skip, + ) + except Exception as e: + if INDEXING_EXCEPTION_LIMIT == 0: + raise + + trace = traceback.format_exc() + create_index_attempt_error( + attempt_id, + batch=index_attempt_metadata.batch_num, + docs=document_batch, + exception_msg=str(e), + exception_traceback=trace, + db_session=db_session, + ) + logger.exception( + f"Indexing batch {index_attempt_metadata.batch_num} failed. msg='{e}' trace='{trace}'" + ) + + index_attempt_metadata.num_exceptions += 1 + if index_attempt_metadata.num_exceptions == INDEXING_EXCEPTION_LIMIT: + logger.warning( + f"Maximum number of exceptions for this index attempt " + f"({INDEXING_EXCEPTION_LIMIT}) has been reached. " + f"The next exception will abort the indexing attempt." + ) + elif index_attempt_metadata.num_exceptions > INDEXING_EXCEPTION_LIMIT: + logger.warning( + f"Maximum number of exceptions for this index attempt " + f"({INDEXING_EXCEPTION_LIMIT}) has been exceeded." + ) + raise RuntimeError( + f"Maximum exception limit of {INDEXING_EXCEPTION_LIMIT} exceeded." + ) + else: + pass + + return r + + +def index_doc_batch_prepare( + document_batch: list[Document], + index_attempt_metadata: IndexAttemptMetadata, + db_session: Session, + ignore_time_skip: bool = False, +) -> DocumentBatchPrepareContext | None: + documents = [] + for document in document_batch: + empty_contents = not any(section.text.strip() for section in document.sections) + if ( + (not document.title or not document.title.strip()) + and not document.semantic_identifier.strip() + and empty_contents + ): + # Skip documents that have neither title nor content + # If the document doesn't have either, then there is no useful information in it + # This is again verified later in the pipeline after chunking but at that point there should + # already be no documents that are empty. + logger.warning( + f"Skipping document with ID {document.id} as it has neither title nor content." + ) + elif ( + document.title is not None and not document.title.strip() and empty_contents + ): + # The title is explicitly empty ("" and not None) and the document is empty + # so when building the chunk text representation, it will be empty and unuseable + logger.warning( + f"Skipping document with ID {document.id} as the chunks will be empty." + ) + else: + documents.append(document) + document_ids = [document.id for document in documents] - db_docs = get_documents_by_ids( + db_docs: list[DBDocument] = get_documents_by_ids( document_ids=document_ids, db_session=db_session, ) - id_to_db_doc_map = {doc.id: doc for doc in db_docs} # Skip indexing docs that don't have a newer updated at # Shortcuts the time-consuming flow on connector index retries @@ -137,7 +229,10 @@ def index_doc_batch( if not ignore_time_skip else documents ) - updatable_ids = [doc.id for doc in updatable_docs] + + # No docs to update either because the batch is empty or every doc was already indexed + if not updatable_docs: + return None # Create records in the source of truth about these documents, # does not include doc_updated_at which is also used to indicate a successful update @@ -147,15 +242,51 @@ def index_doc_batch( db_session=db_session, ) - logger.debug("Starting chunking") + id_to_db_doc_map = {doc.id: doc for doc in db_docs} + return DocumentBatchPrepareContext( + updatable_docs=updatable_docs, id_to_db_doc_map=id_to_db_doc_map + ) - # The first chunk additionally contains the Title of the Document - chunks: list[DocAwareChunk] = list( - chain(*[chunker.chunk(document=document) for document in updatable_docs]) + +@log_function_time() +def index_doc_batch( + *, + chunker: Chunker, + embedder: IndexingEmbedder, + document_index: DocumentIndex, + document_batch: list[Document], + index_attempt_metadata: IndexAttemptMetadata, + db_session: Session, + ignore_time_skip: bool = False, +) -> tuple[int, int]: + """Takes different pieces of the indexing pipeline and applies it to a batch of documents + Note that the documents should already be batched at this point so that it does not inflate the + memory requirements""" + + ctx = index_doc_batch_prepare( + document_batch=document_batch, + index_attempt_metadata=index_attempt_metadata, + ignore_time_skip=ignore_time_skip, + db_session=db_session, ) + if not ctx: + return 0, 0 + + logger.debug("Starting chunking") + chunks: list[DocAwareChunk] = [] + for document in ctx.updatable_docs: + chunks.extend(chunker.chunk(document=document)) logger.debug("Starting embedding") - chunks_with_embeddings = embedder.embed_chunks(chunks=chunks) + chunks_with_embeddings = ( + embedder.embed_chunks( + chunks=chunks, + ) + if chunks + else [] + ) + + updatable_ids = [doc.id for doc in ctx.updatable_docs] # Acquires a lock on the documents so that no other process can modify them # NOTE: don't need to acquire till here, since this is when the actual race condition @@ -181,24 +312,26 @@ def index_doc_batch( document_id_to_document_set.get(chunk.source_document.id, []) ), boost=( - id_to_db_doc_map[chunk.source_document.id].boost - if chunk.source_document.id in id_to_db_doc_map + ctx.id_to_db_doc_map[chunk.source_document.id].boost + if chunk.source_document.id in ctx.id_to_db_doc_map else DEFAULT_BOOST ), ) for chunk in chunks_with_embeddings ] - logger.debug( - f"Indexing the following chunks: {[chunk.to_short_descriptor() for chunk in chunks]}" - ) - # A document will not be spread across different batches, so all the - # documents with chunks in this set, are fully represented by the chunks - # in this set - insertion_records = document_index.index(chunks=access_aware_chunks) + logger.debug( + f"Indexing the following chunks: {[chunk.to_short_descriptor() for chunk in access_aware_chunks]}" + ) + # A document will not be spread across different batches, so all the + # documents with chunks in this set, are fully represented by the chunks + # in this set + insertion_records = document_index.index(chunks=access_aware_chunks) - successful_doc_ids = [record.document_id for record in insertion_records] - successful_docs = [doc for doc in updatable_docs if doc.id in successful_doc_ids] + successful_doc_ids = [record.document_id for record in insertion_records] + successful_docs = [ + doc for doc in ctx.updatable_docs if doc.id in successful_doc_ids + ] # Update the time of latest version of the doc successfully indexed ids_to_new_updated_at = {} @@ -214,7 +347,7 @@ def index_doc_batch( db_session.commit() return len([r for r in insertion_records if r.already_existed is False]), len( - chunks + access_aware_chunks ) @@ -225,15 +358,41 @@ def build_indexing_pipeline( db_session: Session, chunker: Chunker | None = None, ignore_time_skip: bool = False, + attempt_id: int | None = None, ) -> IndexingPipelineProtocol: - """Builds a pipline which takes in a list (batch) of docs and indexes them.""" - chunker = chunker or DefaultChunker() + """Builds a pipeline which takes in a list (batch) of docs and indexes them.""" + search_settings = get_current_search_settings(db_session) + multipass = ( + search_settings.multipass_indexing + if search_settings + else ENABLE_MULTIPASS_INDEXING + ) + + enable_large_chunks = ( + multipass + and + # Only local models that supports larger context are from Nomic + ( + embedder.provider_type is not None + or embedder.model_name.startswith("nomic-ai") + ) + and + # Cohere does not support larger context they recommend not going above 512 tokens + embedder.provider_type != EmbeddingProvider.COHERE + ) + + chunker = chunker or Chunker( + tokenizer=embedder.embedding_model.tokenizer, + enable_multipass=multipass, + enable_large_chunks=enable_large_chunks, + ) return partial( - index_doc_batch, + index_doc_batch_with_handler, chunker=chunker, embedder=embedder, document_index=document_index, ignore_time_skip=ignore_time_skip, + attempt_id=attempt_id, db_session=db_session, ) diff --git a/backend/danswer/indexing/models.py b/backend/danswer/indexing/models.py index 5fc32cd9afa..b23de0eb477 100644 --- a/backend/danswer/indexing/models.py +++ b/backend/danswer/indexing/models.py @@ -1,21 +1,21 @@ from typing import TYPE_CHECKING from pydantic import BaseModel +from pydantic import Field from danswer.access.models import DocumentAccess from danswer.connectors.models import Document from danswer.utils.logger import setup_logger +from shared_configs.enums import EmbeddingProvider +from shared_configs.model_server_models import Embedding if TYPE_CHECKING: - from danswer.db.models import EmbeddingModel + from danswer.db.models import SearchSettings logger = setup_logger() -Embedding = list[float] - - class ChunkEmbedding(BaseModel): full_embedding: Embedding mini_chunk_embeddings: list[Embedding] @@ -25,9 +25,8 @@ class BaseChunk(BaseModel): chunk_id: int blurb: str # The first sentence(s) of the first Section of the chunk content: str - source_links: dict[ - int, str - ] | None # Holds the link and the offsets into the raw Chunk text + # Holds the link and the offsets into the raw Chunk text + source_links: dict[int, str] | None section_continuation: bool # True if this Chunk's start is not at the start of a Section @@ -36,6 +35,20 @@ class DocAwareChunk(BaseChunk): # During inference we only have access to the document id and do not reconstruct the Document source_document: Document + # This could be an empty string if the title is too long and taking up too much of the chunk + # This does not mean necessarily that the document does not have a title + title_prefix: str + + # During indexing we also (optionally) build a metadata string from the metadata dict + # This is also indexed so that we can strip it out after indexing, this way it supports + # multiple iterations of metadata representation for backwards compatibility + metadata_suffix_semantic: str + metadata_suffix_keyword: str + + mini_chunk_texts: list[str] | None + + large_chunk_reference_ids: list[int] = Field(default_factory=list) + def to_short_descriptor(self) -> str: """Used when logging the identity of a chunk""" return ( @@ -72,7 +85,7 @@ def from_index_chunk( document_sets: set[str], boost: int, ) -> "DocMetadataAwareIndexChunk": - index_chunk_data = index_chunk.dict() + index_chunk_data = index_chunk.model_dump() return cls( **index_chunk_data, access=access, @@ -83,17 +96,48 @@ def from_index_chunk( class EmbeddingModelDetail(BaseModel): model_name: str - model_dim: int normalize: bool query_prefix: str | None passage_prefix: str | None + provider_type: EmbeddingProvider | None = None + api_key: str | None = None + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} + + @classmethod + def from_db_model( + cls, + search_settings: "SearchSettings", + ) -> "EmbeddingModelDetail": + return cls( + model_name=search_settings.model_name, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + provider_type=search_settings.provider_type, + api_key=search_settings.api_key, + ) + + +# Additional info needed for indexing time +class IndexingSetting(EmbeddingModelDetail): + model_dim: int + index_name: str | None + multipass_indexing: bool + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} @classmethod - def from_model(cls, embedding_model: "EmbeddingModel") -> "EmbeddingModelDetail": + def from_db_model(cls, search_settings: "SearchSettings") -> "IndexingSetting": return cls( - model_name=embedding_model.model_name, - model_dim=embedding_model.model_dim, - normalize=embedding_model.normalize, - query_prefix=embedding_model.query_prefix, - passage_prefix=embedding_model.passage_prefix, + model_name=search_settings.model_name, + model_dim=search_settings.model_dim, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + provider_type=search_settings.provider_type, + index_name=search_settings.index_name, + multipass_indexing=search_settings.multipass_indexing, ) diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index 2ce36d0746d..630c0c70229 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -1,3 +1,4 @@ +from collections.abc import Callable from collections.abc import Iterator from typing import cast from uuid import uuid4 @@ -34,8 +35,8 @@ from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping from danswer.llm.answering.stream_processing.utils import map_document_id_order from danswer.llm.interfaces import LLM -from danswer.llm.utils import get_default_llm_tokenizer from danswer.llm.utils import message_generator_to_string_generator +from danswer.natural_language_processing.utils import get_tokenizer from danswer.tools.custom.custom_tool_prompt_builder import ( build_user_message_for_custom_tool_for_non_tool_calling_llm, ) @@ -89,6 +90,9 @@ def _get_answer_stream_processor( AnswerStream = Iterator[AnswerQuestionPossibleReturn | ToolCallKickoff | ToolResponse] +logger = setup_logger() + + class Answer: def __init__( self, @@ -96,6 +100,7 @@ def __init__( answer_style_config: AnswerStyleConfig, llm: LLM, prompt_config: PromptConfig, + force_use_tool: ForceUseTool, # must be the same length as `docs`. If None, all docs are considered "relevant" message_history: list[PreviousMessage] | None = None, single_message_history: str | None = None, @@ -104,14 +109,14 @@ def __init__( latest_query_files: list[InMemoryChatFile] | None = None, files: list[InMemoryChatFile] | None = None, tools: list[Tool] | None = None, - # if specified, tells the LLM to always this tool # NOTE: for native tool-calling, this is only supported by OpenAI atm, # but we only support them anyways - force_use_tool: ForceUseTool | None = None, # if set to True, then never use the LLMs provided tool-calling functonality skip_explicit_tool_calling: bool = False, # Returns the full document sections text from the search tool return_contexts: bool = False, + skip_gen_ai_answer_generation: bool = False, + is_connected: Callable[[], bool] | None = None, ) -> None: if single_message_history and message_history: raise ValueError( @@ -119,12 +124,14 @@ def __init__( ) self.question = question + self.is_connected: Callable[[], bool] | None = is_connected self.latest_query_files = latest_query_files or [] self.file_id_to_file = {file.file_id: file for file in (files or [])} self.tools = tools or [] self.force_use_tool = force_use_tool + self.skip_explicit_tool_calling = skip_explicit_tool_calling self.message_history = message_history or [] @@ -135,17 +142,21 @@ def __init__( self.prompt_config = prompt_config self.llm = llm - self.llm_tokenizer = get_default_llm_tokenizer() + self.llm_tokenizer = get_tokenizer( + provider_type=llm.config.model_provider, + model_name=llm.config.model_name, + ) self._final_prompt: list[BaseMessage] | None = None self._streamed_output: list[str] | None = None - - self._processed_stream: list[ - AnswerQuestionPossibleReturn | ToolResponse | ToolCallKickoff - ] | None = None + self._processed_stream: ( + list[AnswerQuestionPossibleReturn | ToolResponse | ToolCallKickoff] | None + ) = None self._return_contexts = return_contexts + self.skip_gen_ai_answer_generation = skip_gen_ai_answer_generation + self._is_cancelled = False def _update_prompt_builder_for_search_tool( self, prompt_builder: AnswerPromptBuilder, final_context_documents: list[LlmDoc] @@ -183,7 +194,7 @@ def _raw_output_for_explicit_tool_calling_llms( prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) tool_call_chunk: AIMessageChunk | None = None - if self.force_use_tool and self.force_use_tool.args is not None: + if self.force_use_tool.force_use and self.force_use_tool.args is not None: # if we are forcing a tool WITH args specified, we don't need to check which tools to run # / need to generate the args tool_call_chunk = AIMessageChunk( @@ -217,7 +228,7 @@ def _raw_output_for_explicit_tool_calling_llms( for message in self.llm.stream( prompt=prompt, tools=final_tool_definitions if final_tool_definitions else None, - tool_choice="required" if self.force_use_tool else None, + tool_choice="required" if self.force_use_tool.force_use else None, ): if isinstance(message, AIMessageChunk) and ( message.tool_call_chunks or message.tool_calls @@ -228,6 +239,8 @@ def _raw_output_for_explicit_tool_calling_llms( tool_call_chunk += message # type: ignore else: if message.content: + if self.is_cancelled: + return yield cast(str, message.content) if not tool_call_chunk: @@ -236,12 +249,26 @@ def _raw_output_for_explicit_tool_calling_llms( # if we have a tool call, we need to call the tool tool_call_requests = tool_call_chunk.tool_calls for tool_call_request in tool_call_requests: - tool = [ + known_tools_by_name = [ tool for tool in self.tools if tool.name == tool_call_request["name"] - ][0] + ] + + if not known_tools_by_name: + logger.error( + "Tool call requested with unknown name field. \n" + f"self.tools: {self.tools}" + f"tool_call_request: {tool_call_request}" + ) + if self.tools: + tool = self.tools[0] + else: + continue + else: + tool = known_tools_by_name[0] tool_args = ( self.force_use_tool.args - if self.force_use_tool and self.force_use_tool.args + if self.force_use_tool.tool_name == tool.name + and self.force_use_tool.args else tool_call_request["args"] ) @@ -259,20 +286,27 @@ def _raw_output_for_explicit_tool_calling_llms( if tool.name in {SearchTool._NAME, InternetSearchTool._NAME}: self._update_prompt_builder_for_search_tool(prompt_builder, []) elif tool.name == ImageGenerationTool._NAME: + img_urls = [ + img_generation_result["url"] + for img_generation_result in tool_runner.tool_final_result().tool_result + ] prompt_builder.update_user_prompt( build_image_generation_user_prompt( - query=self.question, + query=self.question, img_urls=img_urls ) ) yield tool_runner.tool_final_result() prompt = prompt_builder.build(tool_call_summary=tool_call_summary) - yield from message_generator_to_string_generator( + for token in message_generator_to_string_generator( self.llm.stream( prompt=prompt, tools=[tool.tool_definition() for tool in self.tools], ) - ) + ): + if self.is_cancelled: + return + yield token return @@ -282,7 +316,7 @@ def _raw_output_for_non_explicit_tool_calling_llms( prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) chosen_tool_and_args: tuple[Tool, dict] | None = None - if self.force_use_tool: + if self.force_use_tool.force_use: # if we are forcing a tool, we don't need to check which tools to run tool = next( iter( @@ -299,7 +333,7 @@ def _raw_output_for_non_explicit_tool_calling_llms( tool_args = ( self.force_use_tool.args - if self.force_use_tool.args + if self.force_use_tool.args is not None else tool.get_args_for_non_tool_calling_llm( query=self.question, history=self.message_history, @@ -341,7 +375,7 @@ def _raw_output_for_non_explicit_tool_calling_llms( else None ) - logger.info(f"Chosen tool: {chosen_tool_and_args}") + logger.notice(f"Chosen tool: {chosen_tool_and_args}") if not chosen_tool_and_args: prompt_builder.update_system_prompt( @@ -353,9 +387,13 @@ def _raw_output_for_non_explicit_tool_calling_llms( ) ) prompt = prompt_builder.build() - yield from message_generator_to_string_generator( + for token in message_generator_to_string_generator( self.llm.stream(prompt=prompt) - ) + ): + if self.is_cancelled: + return + yield token + return tool, tool_args = chosen_tool_and_args @@ -404,11 +442,17 @@ def _raw_output_for_non_explicit_tool_calling_llms( ) ) ) + final = tool_runner.tool_final_result() - yield tool_runner.tool_final_result() + yield final prompt = prompt_builder.build() - yield from message_generator_to_string_generator(self.llm.stream(prompt=prompt)) + for token in message_generator_to_string_generator( + self.llm.stream(prompt=prompt) + ): + if self.is_cancelled: + return + yield token @property def processed_streamed_output(self) -> AnswerStream: @@ -457,6 +501,7 @@ def _process_stream( ] elif message.id == FINAL_CONTEXT_DOCUMENTS: final_context_docs = cast(list[LlmDoc], message.response) + elif ( message.id == SEARCH_DOC_CONTENT_ID and not self._return_contexts @@ -468,22 +513,23 @@ def _process_stream( # assumes all tool responses will come first, then the final answer break - process_answer_stream_fn = _get_answer_stream_processor( - context_docs=final_context_docs or [], - # if doc selection is enabled, then search_results will be None, - # so we need to use the final_context_docs - doc_id_to_rank_map=map_document_id_order( - search_results or final_context_docs or [] - ), - answer_style_configs=self.answer_style_config, - ) + if not self.skip_gen_ai_answer_generation: + process_answer_stream_fn = _get_answer_stream_processor( + context_docs=final_context_docs or [], + # if doc selection is enabled, then search_results will be None, + # so we need to use the final_context_docs + doc_id_to_rank_map=map_document_id_order( + search_results or final_context_docs or [] + ), + answer_style_configs=self.answer_style_config, + ) - def _stream() -> Iterator[str]: - if message: - yield cast(str, message) - yield from cast(Iterator[str], stream) + def _stream() -> Iterator[str]: + if message: + yield cast(str, message) + yield from cast(Iterator[str], stream) - yield from process_answer_stream_fn(_stream()) + yield from process_answer_stream_fn(_stream()) processed_stream = [] for processed_packet in _process_stream(output_generator): @@ -509,3 +555,15 @@ def citations(self) -> list[CitationInfo]: citations.append(packet) return citations + + @property + def is_cancelled(self) -> bool: + if self._is_cancelled: + return True + + if self.is_connected is not None: + if not self.is_connected(): + logger.debug("Answer stream has been cancelled") + self._is_cancelled = not self.is_connected() + + return self._is_cancelled diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py index 94ca91703ab..fb5fa9c313e 100644 --- a/backend/danswer/llm/answering/models.py +++ b/backend/danswer/llm/answering/models.py @@ -1,6 +1,5 @@ from collections.abc import Callable from collections.abc import Iterator -from typing import Any from typing import TYPE_CHECKING from langchain.schema.messages import AIMessage @@ -8,14 +7,16 @@ from langchain.schema.messages import HumanMessage from langchain.schema.messages import SystemMessage from pydantic import BaseModel +from pydantic import ConfigDict from pydantic import Field -from pydantic import root_validator +from pydantic import model_validator from danswer.chat.models import AnswerQuestionStreamReturn from danswer.configs.constants import MessageType from danswer.file_store.models import InMemoryChatFile from danswer.llm.override_models import PromptOverride from danswer.llm.utils import build_content_with_imgs +from danswer.tools.models import ToolCallFinalResult if TYPE_CHECKING: from danswer.db.models import ChatMessage @@ -32,6 +33,7 @@ class PreviousMessage(BaseModel): token_count: int message_type: MessageType files: list[InMemoryChatFile] + tool_calls: list[ToolCallFinalResult] @classmethod def from_chat_message( @@ -49,6 +51,14 @@ def from_chat_message( for file in available_files if str(file.file_id) in message_file_ids ], + tool_calls=[ + ToolCallFinalResult( + tool_name=tool_call.tool_name, + tool_args=tool_call.tool_arguments, + tool_result=tool_call.tool_result, + ) + for tool_call in chat_message.tool_calls + ], ) def to_langchain_msg(self) -> BaseMessage: @@ -82,6 +92,16 @@ class DocumentPruningConfig(BaseModel): using_tool_message: bool = False +class ContextualPruningConfig(DocumentPruningConfig): + num_chunk_multiple: int + + @classmethod + def from_doc_pruning_config( + cls, num_chunk_multiple: int, doc_pruning_config: DocumentPruningConfig + ) -> "ContextualPruningConfig": + return cls(num_chunk_multiple=num_chunk_multiple, **doc_pruning_config.dict()) + + class CitationConfig(BaseModel): all_docs_useful: bool = False @@ -97,22 +117,19 @@ class AnswerStyleConfig(BaseModel): default_factory=DocumentPruningConfig ) - @root_validator - def check_quotes_and_citation(cls, values: dict[str, Any]) -> dict[str, Any]: - citation_config = values.get("citation_config") - quotes_config = values.get("quotes_config") - - if citation_config is None and quotes_config is None: + @model_validator(mode="after") + def check_quotes_and_citation(self) -> "AnswerStyleConfig": + if self.citation_config is None and self.quotes_config is None: raise ValueError( "One of `citation_config` or `quotes_config` must be provided" ) - if citation_config is not None and quotes_config is not None: + if self.citation_config is not None and self.quotes_config is not None: raise ValueError( "Only one of `citation_config` or `quotes_config` must be provided" ) - return values + return self class PromptConfig(BaseModel): @@ -140,6 +157,4 @@ def from_model( include_citations=model.include_citations, ) - # needed so that this can be passed into lru_cache funcs - class Config: - frozen = True + model_config = ConfigDict(frozen=True) diff --git a/backend/danswer/llm/answering/prompts/build.py b/backend/danswer/llm/answering/prompts/build.py index 7656e729246..f53d4481f6e 100644 --- a/backend/danswer/llm/answering/prompts/build.py +++ b/backend/danswer/llm/answering/prompts/build.py @@ -12,8 +12,8 @@ from danswer.llm.interfaces import LLMConfig from danswer.llm.utils import build_content_with_imgs from danswer.llm.utils import check_message_tokens -from danswer.llm.utils import get_default_llm_tokenizer from danswer.llm.utils import translate_history_to_basemessages +from danswer.natural_language_processing.utils import get_tokenizer from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT from danswer.prompts.prompt_utils import add_date_time_to_prompt from danswer.prompts.prompt_utils import drop_messages_history_overflow @@ -66,7 +66,10 @@ def __init__( self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = None self.user_message_and_token_cnt: tuple[HumanMessage, int] | None = None - llm_tokenizer = get_default_llm_tokenizer() + llm_tokenizer = get_tokenizer( + provider_type=llm_config.model_provider, + model_name=llm_config.model_name, + ) self.llm_tokenizer_encode_func = cast( Callable[[str], list[int]], llm_tokenizer.encode ) @@ -111,8 +114,24 @@ def build( final_messages_with_tokens.append(self.user_message_and_token_cnt) if tool_call_summary: - final_messages_with_tokens.append((tool_call_summary.tool_call_request, 0)) - final_messages_with_tokens.append((tool_call_summary.tool_call_result, 0)) + final_messages_with_tokens.append( + ( + tool_call_summary.tool_call_request, + check_message_tokens( + tool_call_summary.tool_call_request, + self.llm_tokenizer_encode_func, + ), + ) + ) + final_messages_with_tokens.append( + ( + tool_call_summary.tool_call_result, + check_message_tokens( + tool_call_summary.tool_call_result, + self.llm_tokenizer_encode_func, + ), + ) + ) return drop_messages_history_overflow( final_messages_with_tokens, self.max_tokens diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py index 69f727318d0..eddae9badb4 100644 --- a/backend/danswer/llm/answering/prompts/citations_prompt.py +++ b/backend/danswer/llm/answering/prompts/citations_prompt.py @@ -2,10 +2,10 @@ from langchain.schema.messages import SystemMessage from danswer.chat.models import LlmDoc -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS from danswer.db.models import Persona from danswer.db.persona import get_default_prompt__read_only +from danswer.db.search_settings import get_multilingual_expansion from danswer.file_store.utils import InMemoryChatFile from danswer.llm.answering.models import PromptConfig from danswer.llm.factory import get_llms_for_persona @@ -39,7 +39,7 @@ def get_prompt_tokens(prompt_config: PromptConfig) -> int: + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT + CITATION_STATEMENT_TOKEN_CNT + CITATION_REMINDER_TOKEN_CNT - + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0) + + (LANGUAGE_HINT_TOKEN_CNT if get_multilingual_expansion() else 0) + (ADDITIONAL_INFO_TOKEN_CNT if prompt_config.datetime_aware else 0) ) @@ -135,7 +135,10 @@ def build_citations_user_message( all_doc_useful: bool, history_message: str = "", ) -> HumanMessage: - task_prompt_with_reminder = build_task_prompt_reminders(prompt_config) + multilingual_expansion = get_multilingual_expansion() + task_prompt_with_reminder = build_task_prompt_reminders( + prompt=prompt_config, use_language_hint=bool(multilingual_expansion) + ) if context_docs: context_docs_str = build_complete_context_str(context_docs) diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py index b2b67c65b37..07abc4356b6 100644 --- a/backend/danswer/llm/answering/prompts/quotes_prompt.py +++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py @@ -2,8 +2,8 @@ from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import LANGUAGE_HINT -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE +from danswer.db.search_settings import get_multilingual_expansion from danswer.llm.answering.models import PromptConfig from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK @@ -19,7 +19,6 @@ def _build_weak_llm_quotes_prompt( context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool, ) -> HumanMessage: """Since Danswer supports a variety of LLMs, this less demanding prompt is provided as an option to use with weaker LLMs such as small version, low float precision, quantized, @@ -48,8 +47,9 @@ def _build_strong_llm_quotes_prompt( context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool, ) -> HumanMessage: + use_language_hint = bool(get_multilingual_expansion()) + context_block = "" if context_docs: context_docs_str = build_complete_context_str(context_docs) @@ -79,7 +79,6 @@ def build_quotes_user_message( context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), ) -> HumanMessage: prompt_builder = ( _build_weak_llm_quotes_prompt @@ -92,7 +91,6 @@ def build_quotes_user_message( context_docs=context_docs, history_str=history_str, prompt=prompt, - use_language_hint=use_language_hint, ) @@ -101,7 +99,6 @@ def build_quotes_prompt( context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), ) -> HumanMessage: prompt_builder = ( _build_weak_llm_quotes_prompt @@ -114,5 +111,4 @@ def build_quotes_prompt( context_docs=context_docs, history_str=history_str, prompt=prompt, - use_language_hint=use_language_hint, ) diff --git a/backend/danswer/llm/answering/prune_and_merge.py b/backend/danswer/llm/answering/prune_and_merge.py index 3fee5266d8d..0193de1f2aa 100644 --- a/backend/danswer/llm/answering/prune_and_merge.py +++ b/backend/danswer/llm/answering/prune_and_merge.py @@ -10,12 +10,12 @@ ) from danswer.configs.constants import IGNORE_FOR_QA from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.llm.answering.models import DocumentPruningConfig +from danswer.llm.answering.models import ContextualPruningConfig from danswer.llm.answering.models import PromptConfig from danswer.llm.answering.prompts.citations_prompt import compute_max_document_tokens from danswer.llm.interfaces import LLMConfig -from danswer.llm.utils import get_default_llm_tokenizer -from danswer.llm.utils import tokenizer_trim_content +from danswer.natural_language_processing.utils import get_tokenizer +from danswer.natural_language_processing.utils import tokenizer_trim_content from danswer.prompts.prompt_utils import build_doc_context_str from danswer.search.models import InferenceChunk from danswer.search.models import InferenceSection @@ -28,6 +28,9 @@ T = TypeVar("T", bound=LlmDoc | InferenceChunk | InferenceSection) _METADATA_TOKEN_ESTIMATE = 75 +# Title and additional tokens as part of the tool message json +# this is only used to log a warning so we can be more forgiving with the buffer +_OVERCOUNT_ESTIMATE = 256 class PruningError(Exception): @@ -135,8 +138,12 @@ def _apply_pruning( is_manually_selected_docs: bool, use_sections: bool, using_tool_message: bool, + llm_config: LLMConfig, ) -> list[InferenceSection]: - llm_tokenizer = get_default_llm_tokenizer() + llm_tokenizer = get_tokenizer( + provider_type=llm_config.model_provider, + model_name=llm_config.model_name, + ) sections = deepcopy(sections) # don't modify in place # re-order docs with all the "relevant" docs at the front @@ -165,27 +172,36 @@ def _apply_pruning( ) ) - section_tokens = len(llm_tokenizer.encode(section_str)) + section_token_count = len(llm_tokenizer.encode(section_str)) # if not using sections (specifically, using Sections where each section maps exactly to the one center chunk), # truncate chunks that are way too long. This can happen if the embedding model tokenizer is different # than the LLM tokenizer if ( not is_manually_selected_docs and not use_sections - and section_tokens > DOC_EMBEDDING_CONTEXT_SIZE + _METADATA_TOKEN_ESTIMATE + and section_token_count + > DOC_EMBEDDING_CONTEXT_SIZE + _METADATA_TOKEN_ESTIMATE ): - logger.warning( - "Found more tokens in Section than expected, " - "likely mismatch between embedding and LLM tokenizers. Trimming content..." - ) + if ( + section_token_count + > DOC_EMBEDDING_CONTEXT_SIZE + + _METADATA_TOKEN_ESTIMATE + + _OVERCOUNT_ESTIMATE + ): + # If the section is just a little bit over, it is likely due to the additional tool message tokens + # no need to record this, the content will be trimmed just in case + logger.warning( + "Found more tokens in Section than expected, " + "likely mismatch between embedding and LLM tokenizers. Trimming content..." + ) section.combined_content = tokenizer_trim_content( content=section.combined_content, desired_length=DOC_EMBEDDING_CONTEXT_SIZE, tokenizer=llm_tokenizer, ) - section_tokens = DOC_EMBEDDING_CONTEXT_SIZE + section_token_count = DOC_EMBEDDING_CONTEXT_SIZE - total_tokens += section_tokens + total_tokens += section_token_count if total_tokens > token_limit: final_section_ind = ind break @@ -250,28 +266,37 @@ def prune_sections( prompt_config: PromptConfig, llm_config: LLMConfig, question: str, - document_pruning_config: DocumentPruningConfig, + contextual_pruning_config: ContextualPruningConfig, ) -> list[InferenceSection]: # Assumes the sections are score ordered with highest first if section_relevance_list is not None: assert len(sections) == len(section_relevance_list) + actual_num_chunks = ( + contextual_pruning_config.max_chunks + * contextual_pruning_config.num_chunk_multiple + if contextual_pruning_config.max_chunks + else None + ) + token_limit = _compute_limit( prompt_config=prompt_config, llm_config=llm_config, question=question, - max_chunks=document_pruning_config.max_chunks, - max_window_percentage=document_pruning_config.max_window_percentage, - max_tokens=document_pruning_config.max_tokens, - tool_token_count=document_pruning_config.tool_num_tokens, + max_chunks=actual_num_chunks, + max_window_percentage=contextual_pruning_config.max_window_percentage, + max_tokens=contextual_pruning_config.max_tokens, + tool_token_count=contextual_pruning_config.tool_num_tokens, ) + return _apply_pruning( sections=sections, section_relevance_list=section_relevance_list, token_limit=token_limit, - is_manually_selected_docs=document_pruning_config.is_manually_selected_docs, - use_sections=document_pruning_config.use_sections, # Now default True - using_tool_message=document_pruning_config.using_tool_message, + is_manually_selected_docs=contextual_pruning_config.is_manually_selected_docs, + use_sections=contextual_pruning_config.use_sections, # Now default True + using_tool_message=contextual_pruning_config.using_tool_message, + llm_config=llm_config, ) @@ -342,7 +367,7 @@ def prune_and_merge_sections( prompt_config: PromptConfig, llm_config: LLMConfig, question: str, - document_pruning_config: DocumentPruningConfig, + contextual_pruning_config: ContextualPruningConfig, ) -> list[InferenceSection]: # Assumes the sections are score ordered with highest first remaining_sections = prune_sections( @@ -351,7 +376,7 @@ def prune_and_merge_sections( prompt_config=prompt_config, llm_config=llm_config, question=question, - document_pruning_config=document_pruning_config, + contextual_pruning_config=contextual_pruning_config, ) merged_sections = _merge_sections(sections=remaining_sections) diff --git a/backend/danswer/llm/answering/stream_processing/citation_processing.py b/backend/danswer/llm/answering/stream_processing/citation_processing.py index d1d1eb0783a..de80b6f6756 100644 --- a/backend/danswer/llm/answering/stream_processing/citation_processing.py +++ b/backend/danswer/llm/answering/stream_processing/citation_processing.py @@ -125,6 +125,30 @@ def extract_citations_from_stream( length_to_add -= diff continue + # Handle edge case where LLM outputs citation itself + # by allowing it to generate citations on its own. + if curr_segment.startswith("[["): + match = re.match(r"\[\[(\d+)\]\]", curr_segment) + if match: + try: + doc_id = int(match.group(1)) + context_llm_doc = context_docs[doc_id - 1] + yield CitationInfo( + citation_num=target_citation_num, + document_id=context_llm_doc.document_id, + ) + except Exception as e: + logger.warning( + f"Manual LLM citation didn't properly cite documents {e}" + ) + else: + # Will continue attempt on next loops + logger.warning( + "Manual LLM citation wasn't able to close brackets" + ) + + continue + link = context_llm_doc.link # Replace the citation in the current segment @@ -162,6 +186,7 @@ def extract_citations_from_stream( + curr_segment[end + length_to_add :] ) length_to_add += len(curr_segment) - prev_length + last_citation_end = end + length_to_add if last_citation_end > 0: diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py index 10d15b7195c..74f37b85264 100644 --- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -17,7 +17,6 @@ from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT from danswer.prompts.constants import ANSWER_PAT from danswer.prompts.constants import QUOTE_PAT -from danswer.prompts.constants import UNCERTAINTY_PAT from danswer.search.models import InferenceChunk from danswer.utils.logger import setup_logger from danswer.utils.text_processing import clean_model_quote @@ -27,6 +26,7 @@ logger = setup_logger() +answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"', re.IGNORECASE) def _extract_answer_quotes_freeform( @@ -166,18 +166,15 @@ def process_answer( into an Answer and Quotes AND (2) after the complete streaming response has been received to process the model output into an Answer and Quotes.""" answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt) - if answer == UNCERTAINTY_PAT or not answer: - if answer == UNCERTAINTY_PAT: - logger.debug("Answer matched UNCERTAINTY_PAT") - else: - logger.debug("No answer extracted from raw output") + if not answer: + logger.debug("No answer extracted from raw output") return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) - logger.info(f"Answer: {answer}") + logger.notice(f"Answer: {answer}") if not quote_strings: logger.debug("No quotes extracted from raw output") return DanswerAnswer(answer=answer), DanswerQuotes(quotes=[]) - logger.info(f"All quotes (including unmatched): {quote_strings}") + logger.debug(f"All quotes (including unmatched): {quote_strings}") quotes = match_quotes_to_docs(quote_strings, docs) logger.debug(f"Final quotes: {quotes}") @@ -200,7 +197,7 @@ def _extract_quotes_from_completed_token_stream( ) -> DanswerQuotes: answer, quotes = process_answer(model_output, context_docs, is_json_prompt) if answer: - logger.info(answer) + logger.notice(answer) elif model_output: logger.warning("Answer extraction from model output failed.") @@ -227,22 +224,27 @@ def process_model_tokens( found_answer_start = False if is_json_prompt else True found_answer_end = False hold_quote = "" + for token in tokens: model_previous = model_output model_output += token - if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output): - # Note, if the token that completes the pattern has additional text, for example if the token is "? - # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the - # event that the model outputs the UNCERTAINTY_PAT - found_answer_start = True + if not found_answer_start: + m = answer_pattern.search(model_output) + if m: + found_answer_start = True - # Prevent heavy cases of hallucinations where model is not even providing a json until later - if is_json_prompt and len(model_output) > 40: - logger.warning("LLM did not produce json as prompted") - found_answer_end = True + # Prevent heavy cases of hallucinations where model is never providing a JSON + # We want to quickly update the user - not stream forever + if is_json_prompt and len(model_output) > 70: + logger.warning("LLM did not produce json as prompted") + found_answer_end = True + continue - continue + remaining = model_output[m.end() :] + if len(remaining) > 0: + yield DanswerAnswerPiece(answer_piece=remaining) + continue if found_answer_start and not found_answer_end: if is_json_prompt and _stream_json_answer_end(model_previous, token): diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py index 90ad481d453..33b1cc24c81 100644 --- a/backend/danswer/llm/chat_llm.py +++ b/backend/danswer/llm/chat_llm.py @@ -28,7 +28,6 @@ from danswer.configs.model_configs import GEN_AI_API_ENDPOINT from danswer.configs.model_configs import GEN_AI_API_VERSION from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE -from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS from danswer.configs.model_configs import GEN_AI_TEMPERATURE from danswer.llm.interfaces import LLM from danswer.llm.interfaces import LLMConfig @@ -62,13 +61,13 @@ def _convert_litellm_message_to_langchain_message( litellm_message: litellm.Message, ) -> BaseMessage: # Extracting the basic attributes from the litellm message - content = litellm_message.content + content = litellm_message.content or "" role = litellm_message.role # Handling function calls and tool calls if present tool_calls = ( cast( - list[litellm.utils.ChatCompletionMessageToolCall], + list[litellm.ChatCompletionMessageToolCall], litellm_message.tool_calls, ) if hasattr(litellm_message, "tool_calls") @@ -87,7 +86,7 @@ def _convert_litellm_message_to_langchain_message( "args": json.loads(tool_call.function.arguments), "id": tool_call.id, } - for tool_call in tool_calls + for tool_call in (tool_calls if tool_calls else []) ], ) elif role == "system": @@ -187,21 +186,16 @@ class DefaultMultiLLM(LLM): """Uses Litellm library to allow easy configuration to use a multitude of LLMs See https://python.langchain.com/docs/integrations/chat/litellm""" - DEFAULT_MODEL_PARAMS: dict[str, Any] = { - "frequency_penalty": 0, - "presence_penalty": 0, - } - def __init__( self, api_key: str | None, timeout: int, model_provider: str, model_name: str, + max_output_tokens: int | None = None, api_base: str | None = GEN_AI_API_ENDPOINT, api_version: str | None = GEN_AI_API_VERSION, custom_llm_provider: str | None = GEN_AI_LLM_PROVIDER_TYPE, - max_output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, temperature: float = GEN_AI_TEMPERATURE, custom_config: dict[str, str] | None = None, extra_headers: dict[str, str] | None = None, @@ -214,7 +208,17 @@ def __init__( self._api_base = api_base self._api_version = api_version self._custom_llm_provider = custom_llm_provider - self._max_output_tokens = max_output_tokens + + # This can be used to store the maximum output tkoens for this model. + # self._max_output_tokens = ( + # max_output_tokens + # if max_output_tokens is not None + # else get_llm_max_output_tokens( + # model_map=litellm.model_cost, + # model_name=model_name, + # model_provider=model_provider, + # ) + # ) self._custom_config = custom_config # NOTE: have to set these as environment variables for Litellm since @@ -224,42 +228,38 @@ def __init__( for k, v in custom_config.items(): os.environ[k] = v - model_kwargs = ( - DefaultMultiLLM.DEFAULT_MODEL_PARAMS if model_provider == "openai" else {} - ) + model_kwargs: dict[str, Any] = {} if extra_headers: model_kwargs.update({"extra_headers": extra_headers}) self._model_kwargs = model_kwargs - @staticmethod - def _log_prompt(prompt: LanguageModelInput) -> None: - if isinstance(prompt, list): - for ind, msg in enumerate(prompt): - if isinstance(msg, AIMessageChunk): - if msg.content: - log_msg = msg.content - elif msg.tool_call_chunks: - log_msg = "Tool Calls: " + str( - [ - { - key: value - for key, value in tool_call.items() - if key != "index" - } - for tool_call in msg.tool_call_chunks - ] - ) - else: - log_msg = "" - logger.debug(f"Message {ind}:\n{log_msg}") - else: - logger.debug(f"Message {ind}:\n{msg.content}") - if isinstance(prompt, str): - logger.debug(f"Prompt:\n{prompt}") - def log_model_configs(self) -> None: - logger.info(f"Config: {self.config}") + logger.debug(f"Config: {self.config}") + + # def _calculate_max_output_tokens(self, prompt: LanguageModelInput) -> int: + # # NOTE: This method can be used for calculating the maximum tokens for the stream, + # # but it isn't used in practice due to the computational cost of counting tokens + # # and because LLM providers automatically cut off at the maximum output. + # # The implementation is kept for potential future use or debugging purposes. + + # # Get max input tokens for the model + # max_context_tokens = get_max_input_tokens( + # model_name=self.config.model_name, model_provider=self.config.model_provider + # ) + + # llm_tokenizer = get_tokenizer( + # model_name=self.config.model_name, + # provider_type=self.config.model_provider, + # ) + # # Calculate tokens in the input prompt + # input_tokens = sum(len(llm_tokenizer.encode(str(m))) for m in prompt) + + # # Calculate available tokens for output + # available_output_tokens = max_context_tokens - input_tokens + + # # Return the lesser of available tokens or configured max + # return min(self._max_output_tokens, available_output_tokens) def _completion( self, @@ -292,8 +292,11 @@ def _completion( stream=stream, # model params temperature=self._temperature, - max_tokens=self._max_output_tokens, timeout=self._timeout, + # For now, we don't support parallel tool calls + # NOTE: we can't pass this in if tools are not specified + # or else OpenAI throws an error + **({"parallel_tool_calls": False} if tools else {}), **self._model_kwargs, ) except Exception as e: @@ -311,7 +314,7 @@ def config(self) -> LLMConfig: api_version=self._api_version, ) - def invoke( + def _invoke_implementation( self, prompt: LanguageModelInput, tools: list[dict] | None = None, @@ -319,16 +322,17 @@ def invoke( ) -> BaseMessage: if LOG_DANSWER_MODEL_INTERACTIONS: self.log_model_configs() - self._log_prompt(prompt) response = cast( litellm.ModelResponse, self._completion(prompt, tools, tool_choice, False) ) - return _convert_litellm_message_to_langchain_message( - response.choices[0].message - ) + choice = response.choices[0] + if hasattr(choice, "message"): + return _convert_litellm_message_to_langchain_message(choice.message) + else: + raise ValueError("Unexpected response choice type") - def stream( + def _stream_implementation( self, prompt: LanguageModelInput, tools: list[dict] | None = None, @@ -336,14 +340,16 @@ def stream( ) -> Iterator[BaseMessage]: if LOG_DANSWER_MODEL_INTERACTIONS: self.log_model_configs() - self._log_prompt(prompt) if DISABLE_LITELLM_STREAMING: yield self.invoke(prompt) return output = None - response = self._completion(prompt, tools, tool_choice, True) + response = cast( + litellm.CustomStreamWrapper, + self._completion(prompt, tools, tool_choice, True), + ) try: for part in response: if len(part["choices"]) == 0: diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py index 2c4c029aa2d..967e014a903 100644 --- a/backend/danswer/llm/custom_llm.py +++ b/backend/danswer/llm/custom_llm.py @@ -8,7 +8,7 @@ from requests import Timeout from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS +from danswer.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS from danswer.llm.interfaces import LLM from danswer.llm.interfaces import ToolChoiceOptions from danswer.llm.utils import convert_lm_input_to_basic_string @@ -38,7 +38,7 @@ def __init__( api_key: str | None, timeout: int, endpoint: str | None = GEN_AI_API_ENDPOINT, - max_output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, + max_output_tokens: int = GEN_AI_NUM_RESERVED_OUTPUT_TOKENS, ): if not endpoint: raise ValueError( @@ -76,7 +76,7 @@ def _execute(self, input: LanguageModelInput) -> AIMessage: def log_model_configs(self) -> None: logger.debug(f"Custom model at: {self._endpoint}") - def invoke( + def _invoke_implementation( self, prompt: LanguageModelInput, tools: list[dict] | None = None, @@ -84,7 +84,7 @@ def invoke( ) -> BaseMessage: return self._execute(prompt) - def stream( + def _stream_implementation( self, prompt: LanguageModelInput, tools: list[dict] | None = None, diff --git a/backend/danswer/llm/interfaces.py b/backend/danswer/llm/interfaces.py index e876403c421..5e39792c393 100644 --- a/backend/danswer/llm/interfaces.py +++ b/backend/danswer/llm/interfaces.py @@ -3,9 +3,12 @@ from typing import Literal from langchain.schema.language_model import LanguageModelInput +from langchain_core.messages import AIMessageChunk from langchain_core.messages import BaseMessage from pydantic import BaseModel +from danswer.configs.app_configs import DISABLE_GENERATIVE_AI +from danswer.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS from danswer.utils.logger import setup_logger @@ -18,9 +21,38 @@ class LLMConfig(BaseModel): model_provider: str model_name: str temperature: float - api_key: str | None - api_base: str | None - api_version: str | None + api_key: str | None = None + api_base: str | None = None + api_version: str | None = None + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} + + +def log_prompt(prompt: LanguageModelInput) -> None: + if isinstance(prompt, list): + for ind, msg in enumerate(prompt): + if isinstance(msg, AIMessageChunk): + if msg.content: + log_msg = msg.content + elif msg.tool_call_chunks: + log_msg = "Tool Calls: " + str( + [ + { + key: value + for key, value in tool_call.items() + if key != "index" + } + for tool_call in msg.tool_call_chunks + ] + ) + else: + log_msg = "" + logger.debug(f"Message {ind}:\n{log_msg}") + else: + logger.debug(f"Message {ind}:\n{msg.content}") + if isinstance(prompt, str): + logger.debug(f"Prompt:\n{prompt}") class LLM(abc.ABC): @@ -45,20 +77,48 @@ def config(self) -> LLMConfig: def log_model_configs(self) -> None: raise NotImplementedError - @abc.abstractmethod + def _precall(self, prompt: LanguageModelInput) -> None: + if DISABLE_GENERATIVE_AI: + raise Exception("Generative AI is disabled") + if LOG_DANSWER_MODEL_INTERACTIONS: + log_prompt(prompt) + def invoke( self, prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, ) -> BaseMessage: - raise NotImplementedError + self._precall(prompt) + # TODO add a postcall to log model outputs independent of concrete class + # implementation + return self._invoke_implementation(prompt, tools, tool_choice) @abc.abstractmethod + def _invoke_implementation( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> BaseMessage: + raise NotImplementedError + def stream( self, prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + ) -> Iterator[BaseMessage]: + self._precall(prompt) + # TODO add a postcall to log model outputs independent of concrete class + # implementation + return self._stream_implementation(prompt, tools, tool_choice) + + @abc.abstractmethod + def _stream_implementation( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, ) -> Iterator[BaseMessage]: raise NotImplementedError diff --git a/backend/danswer/llm/llm_initialization.py b/backend/danswer/llm/llm_initialization.py index 5c6f8bdbe47..fef17ca812d 100644 --- a/backend/danswer/llm/llm_initialization.py +++ b/backend/danswer/llm/llm_initialization.py @@ -70,9 +70,11 @@ def load_llm_providers(db_session: Session) -> None: FAST_GEN_AI_MODEL_VERSION or well_known_provider.default_fast_model ), model_names=model_names, + is_public=True, + display_model_names=[], ) llm_provider = upsert_llm_provider(db_session, llm_provider_request) update_default_provider(db_session, llm_provider.id) - logger.info( + logger.notice( f"Migrated LLM provider from env variables for provider '{GEN_AI_MODEL_PROVIDER}'" ) diff --git a/backend/danswer/llm/llm_provider_options.py b/backend/danswer/llm/llm_provider_options.py index bb4bd13dd79..24feeb2f27c 100644 --- a/backend/danswer/llm/llm_provider_options.py +++ b/backend/danswer/llm/llm_provider_options.py @@ -26,13 +26,13 @@ class WellKnownLLMProviderDescriptor(BaseModel): OPEN_AI_MODEL_NAMES = [ "gpt-4", "gpt-4o", + "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4-1106-preview", "gpt-4-vision-preview", - # "gpt-4-32k", # not EOL but still doesnt work "gpt-4-0613", - # "gpt-4-32k-0613", # not EOL but still doesnt work + "gpt-4o-2024-08-06", "gpt-4-0314", "gpt-4-32k-0314", "gpt-3.5-turbo", @@ -47,9 +47,11 @@ class WellKnownLLMProviderDescriptor(BaseModel): BEDROCK_PROVIDER_NAME = "bedrock" # need to remove all the weird "bedrock/eu-central-1/anthropic.claude-v1" named # models -BEDROCK_MODEL_NAMES = [model for model in litellm.bedrock_models if "/" not in model][ - ::-1 -] +BEDROCK_MODEL_NAMES = [ + model + for model in litellm.bedrock_models + if "/" not in model and "embed" not in model +][::-1] IGNORABLE_ANTHROPIC_MODELS = [ "claude-2", @@ -83,7 +85,7 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: custom_config_keys=[], llm_names=fetch_models_for_provider(OPENAI_PROVIDER_NAME), default_model="gpt-4", - default_fast_model="gpt-3.5-turbo", + default_fast_model="gpt-4o-mini", ), WellKnownLLMProviderDescriptor( name=ANTHROPIC_PROVIDER_NAME, diff --git a/backend/danswer/llm/override_models.py b/backend/danswer/llm/override_models.py index 1ecb3192f0a..08e4258916a 100644 --- a/backend/danswer/llm/override_models.py +++ b/backend/danswer/llm/override_models.py @@ -11,6 +11,9 @@ class LLMOverride(BaseModel): model_version: str | None = None temperature: float | None = None + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} + class PromptOverride(BaseModel): system_prompt: str | None = None diff --git a/backend/danswer/llm/utils.py b/backend/danswer/llm/utils.py index a526adddc79..82617f3f05b 100644 --- a/backend/danswer/llm/utils.py +++ b/backend/danswer/llm/utils.py @@ -1,6 +1,6 @@ +import json from collections.abc import Callable from collections.abc import Iterator -from copy import copy from typing import Any from typing import cast from typing import TYPE_CHECKING @@ -16,19 +16,29 @@ from langchain.schema.messages import BaseMessage from langchain.schema.messages import HumanMessage from langchain.schema.messages import SystemMessage -from tiktoken.core import Encoding +from litellm.exceptions import APIConnectionError # type: ignore +from litellm.exceptions import APIError # type: ignore +from litellm.exceptions import AuthenticationError # type: ignore +from litellm.exceptions import BadRequestError # type: ignore +from litellm.exceptions import BudgetExceededError # type: ignore +from litellm.exceptions import ContentPolicyViolationError # type: ignore +from litellm.exceptions import ContextWindowExceededError # type: ignore +from litellm.exceptions import NotFoundError # type: ignore +from litellm.exceptions import PermissionDeniedError # type: ignore +from litellm.exceptions import RateLimitError # type: ignore +from litellm.exceptions import Timeout # type: ignore +from litellm.exceptions import UnprocessableEntityError # type: ignore from danswer.configs.constants import MessageType -from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS from danswer.configs.model_configs import GEN_AI_MAX_TOKENS +from danswer.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER +from danswer.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS from danswer.db.models import ChatMessage from danswer.file_store.models import ChatFileType from danswer.file_store.models import InMemoryChatFile from danswer.llm.interfaces import LLM from danswer.prompts.constants import CODE_BLOCK_PAT -from danswer.search.models import InferenceChunk from danswer.utils.logger import setup_logger from shared_configs.configs import LOG_LEVEL @@ -37,60 +47,69 @@ logger = setup_logger() -_LLM_TOKENIZER: Any = None -_LLM_TOKENIZER_ENCODE: Callable[[str], Any] | None = None +def litellm_exception_to_error_msg(e: Exception, llm: LLM) -> str: + error_msg = str(e) -def get_default_llm_tokenizer() -> Encoding: - """Currently only supports the OpenAI default tokenizer: tiktoken""" - global _LLM_TOKENIZER - if _LLM_TOKENIZER is None: - _LLM_TOKENIZER = tiktoken.get_encoding("cl100k_base") - return _LLM_TOKENIZER - - -def get_default_llm_token_encode() -> Callable[[str], Any]: - global _LLM_TOKENIZER_ENCODE - if _LLM_TOKENIZER_ENCODE is None: - tokenizer = get_default_llm_tokenizer() - if isinstance(tokenizer, Encoding): - return tokenizer.encode # type: ignore - - # Currently only supports OpenAI encoder - raise ValueError("Invalid Encoder selected") - - return _LLM_TOKENIZER_ENCODE - - -def tokenizer_trim_content( - content: str, desired_length: int, tokenizer: Encoding -) -> str: - tokens = tokenizer.encode(content) - if len(tokens) > desired_length: - content = tokenizer.decode(tokens[:desired_length]) - return content - - -def tokenizer_trim_chunks( - chunks: list[InferenceChunk], max_chunk_toks: int = DOC_EMBEDDING_CONTEXT_SIZE -) -> list[InferenceChunk]: - tokenizer = get_default_llm_tokenizer() - new_chunks = copy(chunks) - for ind, chunk in enumerate(new_chunks): - new_content = tokenizer_trim_content(chunk.content, max_chunk_toks, tokenizer) - if len(new_content) != len(chunk.content): - new_chunk = copy(chunk) - new_chunk.content = new_content - new_chunks[ind] = new_chunk - return new_chunks + if isinstance(e, BadRequestError): + error_msg = "Bad request: The server couldn't process your request. Please check your input." + elif isinstance(e, AuthenticationError): + error_msg = "Authentication failed: Please check your API key and credentials." + elif isinstance(e, PermissionDeniedError): + error_msg = ( + "Permission denied: You don't have the necessary permissions for this operation." + "Ensure you have access to this model." + ) + elif isinstance(e, NotFoundError): + error_msg = "Resource not found: The requested resource doesn't exist." + elif isinstance(e, UnprocessableEntityError): + error_msg = "Unprocessable entity: The server couldn't process your request due to semantic errors." + elif isinstance(e, RateLimitError): + error_msg = ( + "Rate limit exceeded: Please slow down your requests and try again later." + ) + elif isinstance(e, ContextWindowExceededError): + error_msg = ( + "Context window exceeded: Your input is too long for the model to process." + ) + if llm is not None: + try: + max_context = get_max_input_tokens( + model_name=llm.config.model_name, + model_provider=llm.config.model_provider, + ) + error_msg += f"Your invoked model ({llm.config.model_name}) has a maximum context size of {max_context}" + except Exception: + logger.warning( + "Unable to get maximum input token for LiteLLM excpetion handling" + ) + elif isinstance(e, ContentPolicyViolationError): + error_msg = "Content policy violation: Your request violates the content policy. Please revise your input." + elif isinstance(e, APIConnectionError): + error_msg = "API connection error: Failed to connect to the API. Please check your internet connection." + elif isinstance(e, BudgetExceededError): + error_msg = ( + "Budget exceeded: You've exceeded your allocated budget for API usage." + ) + elif isinstance(e, Timeout): + error_msg = "Request timed out: The operation took too long to complete. Please try again." + elif isinstance(e, APIError): + error_msg = f"API error: An error occurred while communicating with the API. Details: {str(e)}" + else: + error_msg = "An unexpected error occurred while processing your request. Please try again later." + return error_msg def translate_danswer_msg_to_langchain( msg: Union[ChatMessage, "PreviousMessage"], ) -> BaseMessage: + files: list[InMemoryChatFile] = [] + # If the message is a `ChatMessage`, it doesn't have the downloaded files - # attached. Just ignore them for now - files = [] if isinstance(msg, ChatMessage) else msg.files + # attached. Just ignore them for now. Also, OpenAI doesn't allow files to + # be attached to AI messages, so we must remove them + if not isinstance(msg, ChatMessage) and msg.message_type != MessageType.ASSISTANT: + files = msg.files content = build_content_with_imgs(msg.message, files) if msg.message_type == MessageType.SYSTEM: @@ -271,6 +290,13 @@ def check_message_tokens( elif part["type"] == "image_url": total_tokens += _IMG_TOKENS + if isinstance(message, AIMessage) and message.tool_calls: + for tool_call in message.tool_calls: + total_tokens += check_number_of_tokens( + json.dumps(tool_call["args"]), encode_fn + ) + total_tokens += check_number_of_tokens(tool_call["name"], encode_fn) + return total_tokens @@ -310,31 +336,80 @@ def get_llm_max_tokens( """Best effort attempt to get the max tokens for the LLM""" if GEN_AI_MAX_TOKENS: # This is an override, so always return this + logger.info(f"Using override GEN_AI_MAX_TOKENS: {GEN_AI_MAX_TOKENS}") return GEN_AI_MAX_TOKENS try: model_obj = model_map.get(f"{model_provider}/{model_name}") if not model_obj: model_obj = model_map[model_name] + logger.debug(f"Using model object for {model_name}") + else: + logger.debug(f"Using model object for {model_provider}/{model_name}") if "max_input_tokens" in model_obj: - return model_obj["max_input_tokens"] + max_tokens = model_obj["max_input_tokens"] + logger.info( + f"Max tokens for {model_name}: {max_tokens} (from max_input_tokens)" + ) + return max_tokens if "max_tokens" in model_obj: - return model_obj["max_tokens"] + max_tokens = model_obj["max_tokens"] + logger.info(f"Max tokens for {model_name}: {max_tokens} (from max_tokens)") + return max_tokens + logger.error(f"No max tokens found for LLM: {model_name}") raise RuntimeError("No max tokens found for LLM") except Exception: logger.exception( - f"Failed to get max tokens for LLM with name {model_name}. Defaulting to 4096." + f"Failed to get max tokens for LLM with name {model_name}. Defaulting to {GEN_AI_MODEL_FALLBACK_MAX_TOKENS}." + ) + return GEN_AI_MODEL_FALLBACK_MAX_TOKENS + + +def get_llm_max_output_tokens( + model_map: dict, + model_name: str, + model_provider: str = GEN_AI_MODEL_PROVIDER, +) -> int: + """Best effort attempt to get the max output tokens for the LLM""" + try: + model_obj = model_map.get(f"{model_provider}/{model_name}") + if not model_obj: + model_obj = model_map[model_name] + logger.debug(f"Using model object for {model_name}") + else: + logger.debug(f"Using model object for {model_provider}/{model_name}") + + if "max_output_tokens" in model_obj: + max_output_tokens = model_obj["max_output_tokens"] + logger.info(f"Max output tokens for {model_name}: {max_output_tokens}") + return max_output_tokens + + # Fallback to a fraction of max_tokens if max_output_tokens is not specified + if "max_tokens" in model_obj: + max_output_tokens = int(model_obj["max_tokens"] * 0.1) + logger.info( + f"Fallback max output tokens for {model_name}: {max_output_tokens} (10% of max_tokens)" + ) + return max_output_tokens + + logger.error(f"No max output tokens found for LLM: {model_name}") + raise RuntimeError("No max output tokens found for LLM") + except Exception: + default_output_tokens = int(GEN_AI_MODEL_FALLBACK_MAX_TOKENS) + logger.exception( + f"Failed to get max output tokens for LLM with name {model_name}. " + f"Defaulting to {default_output_tokens} (fallback max tokens)." ) - return 4096 + return default_output_tokens def get_max_input_tokens( model_name: str, model_provider: str, - output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, + output_tokens: int = GEN_AI_NUM_RESERVED_OUTPUT_TOKENS, ) -> int: # NOTE: we previously used `litellm.get_max_tokens()`, but despite the name, this actually # returns the max OUTPUT tokens. Under the hood, this uses the `litellm.model_cost` dict, diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 88064a2e8df..6652e5d3c39 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -32,34 +32,53 @@ from danswer.configs.app_configs import OAUTH_CLIENT_SECRET from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import WEB_DOMAIN -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.constants import AuthType +from danswer.configs.constants import KV_REINDEX_KEY +from danswer.configs.constants import KV_SEARCH_SETTINGS +from danswer.configs.constants import POSTGRES_WEB_APP_NAME +from danswer.db.connector import check_connectors_exist from danswer.db.connector import create_initial_default_connector from danswer.db.connector_credential_pair import associate_default_cc_pair from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.connector_credential_pair import resync_cc_pair from danswer.db.credentials import create_initial_public_credential -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model +from danswer.db.document import check_docs_exist from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import init_sqlalchemy_engine from danswer.db.engine import warm_up_connections from danswer.db.index_attempt import cancel_indexing_attempts_past_model from danswer.db.index_attempt import expire_index_attempts from danswer.db.persona import delete_old_default_personas +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.search_settings import update_current_search_settings +from danswer.db.search_settings import update_secondary_search_settings from danswer.db.standard_answer import create_initial_default_standard_answer_category from danswer.db.swap_index import check_index_swap from danswer.document_index.factory import get_default_document_index +from danswer.document_index.interfaces import DocumentIndex +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.indexing.models import IndexingSetting from danswer.llm.llm_initialization import load_llm_providers +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder +from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder +from danswer.search.models import SavedSearchSettings from danswer.search.retrieval.search_runner import download_nltk_data -from danswer.search.search_nlp_models import warm_up_encoders from danswer.server.auth_check import check_router_auth from danswer.server.danswer_api.ingestion import router as danswer_api_router from danswer.server.documents.cc_pair import router as cc_pair_router from danswer.server.documents.connector import router as connector_router from danswer.server.documents.credential import router as credential_router from danswer.server.documents.document import router as document_router +from danswer.server.documents.indexing import router as indexing_router from danswer.server.features.document_set.api import router as document_set_router from danswer.server.features.folder.api import router as folder_router +from danswer.server.features.input_prompt.api import ( + admin_router as admin_input_prompt_router, +) +from danswer.server.features.input_prompt.api import basic_router as input_prompt_router from danswer.server.features.persona.api import admin_router as admin_persona_router from danswer.server.features.persona.api import basic_router as persona_router from danswer.server.features.prompt.api import basic_router as prompt_router @@ -67,10 +86,12 @@ from danswer.server.features.tool.api import router as tool_router from danswer.server.gpts.api import router as gpts_router from danswer.server.manage.administrative import router as admin_router +from danswer.server.manage.embedding.api import admin_router as embedding_admin_router +from danswer.server.manage.embedding.api import basic_router as embedding_router from danswer.server.manage.get_state import router as state_router from danswer.server.manage.llm.api import admin_router as llm_admin_router from danswer.server.manage.llm.api import basic_router as llm_router -from danswer.server.manage.secondary_index import router as secondary_index_router +from danswer.server.manage.search_settings import router as search_settings_router from danswer.server.manage.slack_bot import router as slack_bot_management_router from danswer.server.manage.standard_answer import router as standard_answer_router from danswer.server.manage.users import router as user_router @@ -94,7 +115,6 @@ from danswer.utils.variable_functionality import fetch_versioned_implementation from danswer.utils.variable_functionality import global_version from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable -from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT @@ -150,8 +170,116 @@ def include_router_with_global_prefix_prepended( application.include_router(router, **final_kwargs) +def setup_postgres(db_session: Session) -> None: + logger.notice("Verifying default connector/credential exist.") + create_initial_public_credential(db_session) + create_initial_default_connector(db_session) + associate_default_cc_pair(db_session) + + logger.notice("Verifying default standard answer category exists.") + create_initial_default_standard_answer_category(db_session) + + logger.notice("Loading LLM providers from env variables") + load_llm_providers(db_session) + + logger.notice("Loading default Prompts and Personas") + delete_old_default_personas(db_session) + load_chat_yamls() + + logger.notice("Loading built-in tools") + load_builtin_tools(db_session) + refresh_built_in_tools_cache(db_session) + auto_add_search_tool_to_personas(db_session) + + +def translate_saved_search_settings(db_session: Session) -> None: + kv_store = get_dynamic_config_store() + + try: + search_settings_dict = kv_store.load(KV_SEARCH_SETTINGS) + if isinstance(search_settings_dict, dict): + # Update current search settings + current_settings = get_current_search_settings(db_session) + + # Update non-preserved fields + if current_settings: + current_settings_dict = SavedSearchSettings.from_db_model( + current_settings + ).dict() + + new_current_settings = SavedSearchSettings( + **{**current_settings_dict, **search_settings_dict} + ) + update_current_search_settings(db_session, new_current_settings) + + # Update secondary search settings + secondary_settings = get_secondary_search_settings(db_session) + if secondary_settings: + secondary_settings_dict = SavedSearchSettings.from_db_model( + secondary_settings + ).dict() + + new_secondary_settings = SavedSearchSettings( + **{**secondary_settings_dict, **search_settings_dict} + ) + update_secondary_search_settings( + db_session, + new_secondary_settings, + ) + # Delete the KV store entry after successful update + kv_store.delete(KV_SEARCH_SETTINGS) + logger.notice("Search settings updated and KV store entry deleted.") + else: + logger.notice("KV store search settings is empty.") + except ConfigNotFoundError: + logger.notice("No search config found in KV store.") + + +def mark_reindex_flag(db_session: Session) -> None: + kv_store = get_dynamic_config_store() + try: + value = kv_store.load(KV_REINDEX_KEY) + logger.debug(f"Re-indexing flag has value {value}") + return + except ConfigNotFoundError: + # Only need to update the flag if it hasn't been set + pass + + # If their first deployment is after the changes, it will + # enable this when the other changes go in, need to avoid + # this being set to False, then the user indexes things on the old version + docs_exist = check_docs_exist(db_session) + connectors_exist = check_connectors_exist(db_session) + if docs_exist or connectors_exist: + kv_store.store(KV_REINDEX_KEY, True) + else: + kv_store.store(KV_REINDEX_KEY, False) + + +def setup_vespa( + document_index: DocumentIndex, + index_setting: IndexingSetting, + secondary_index_setting: IndexingSetting | None, +) -> None: + # Vespa startup is a bit slow, so give it a few seconds + wait_time = 5 + for _ in range(5): + try: + document_index.ensure_indices_exist( + index_embedding_dim=index_setting.model_dim, + secondary_index_embedding_dim=secondary_index_setting.model_dim + if secondary_index_setting + else None, + ) + break + except Exception: + logger.notice(f"Waiting on Vespa, retrying in {wait_time} seconds...") + time.sleep(wait_time) + + @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: + init_sqlalchemy_engine(POSTGRES_WEB_APP_NAME) engine = get_sqlalchemy_engine() verify_auth = fetch_versioned_implementation( @@ -161,28 +289,23 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: verify_auth() if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET: - logger.info("Both OAuth Client ID and Secret are configured.") + logger.notice("Both OAuth Client ID and Secret are configured.") if DISABLE_GENERATIVE_AI: - logger.info("Generative AI Q&A disabled") - - if MULTILINGUAL_QUERY_EXPANSION: - logger.info( - f"Using multilingual flow with languages: {MULTILINGUAL_QUERY_EXPANSION}" - ) + logger.notice("Generative AI Q&A disabled") # fill up Postgres connection pools await warm_up_connections() with Session(engine) as db_session: check_index_swap(db_session=db_session) - db_embedding_model = get_current_db_embedding_model(db_session) - secondary_db_embedding_model = get_secondary_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) + secondary_search_settings = get_secondary_search_settings(db_session) # Break bad state for thrashing indexes - if secondary_db_embedding_model and DISABLE_INDEX_UPDATE_ON_SWAP: + if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP: expire_index_attempts( - embedding_model_id=db_embedding_model.id, db_session=db_session + search_settings_id=search_settings.id, db_session=db_session ) for cc_pair in get_connector_credential_pairs(db_session): @@ -191,72 +314,62 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # Expire all old embedding models indexing attempts, technically redundant cancel_indexing_attempts_past_model(db_session) - logger.info(f'Using Embedding model: "{db_embedding_model.model_name}"') - if db_embedding_model.query_prefix or db_embedding_model.passage_prefix: - logger.info(f'Query embedding prefix: "{db_embedding_model.query_prefix}"') - logger.info( - f'Passage embedding prefix: "{db_embedding_model.passage_prefix}"' + logger.notice(f'Using Embedding model: "{search_settings.model_name}"') + if search_settings.query_prefix or search_settings.passage_prefix: + logger.notice(f'Query embedding prefix: "{search_settings.query_prefix}"') + logger.notice( + f'Passage embedding prefix: "{search_settings.passage_prefix}"' ) - if ENABLE_RERANKING_REAL_TIME_FLOW: - logger.info("Reranking step of search flow is enabled.") + if search_settings: + if not search_settings.disable_rerank_for_streaming: + logger.notice("Reranking is enabled.") - logger.info("Verifying query preprocessing (NLTK) data is downloaded") - download_nltk_data() + if search_settings.multilingual_expansion: + logger.notice( + f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}." + ) - logger.info("Verifying default connector/credential exist.") - create_initial_public_credential(db_session) - create_initial_default_connector(db_session) - associate_default_cc_pair(db_session) + if search_settings.rerank_model_name and not search_settings.provider_type: + warm_up_cross_encoder(search_settings.rerank_model_name) - logger.info("Verifying default standard answer category exists.") - create_initial_default_standard_answer_category(db_session) + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") + download_nltk_data() - logger.info("Loading LLM providers from env variables") - load_llm_providers(db_session) + # setup Postgres with default credential, llm providers, etc. + setup_postgres(db_session) - logger.info("Loading default Prompts and Personas") - delete_old_default_personas(db_session) - load_chat_yamls() + translate_saved_search_settings(db_session) - logger.info("Loading built-in tools") - load_builtin_tools(db_session) - refresh_built_in_tools_cache(db_session) - auto_add_search_tool_to_personas(db_session) + # Does the user need to trigger a reindexing to bring the document index + # into a good state, marked in the kv store + mark_reindex_flag(db_session) - logger.info("Verifying Document Index(s) is/are available.") + # ensure Vespa is setup correctly + logger.notice("Verifying Document Index(s) is/are available.") document_index = get_default_document_index( - primary_index_name=db_embedding_model.index_name, - secondary_index_name=( - secondary_db_embedding_model.index_name - if secondary_db_embedding_model - else None - ), + primary_index_name=search_settings.index_name, + secondary_index_name=secondary_search_settings.index_name + if secondary_search_settings + else None, ) - # Vespa startup is a bit slow, so give it a few seconds - wait_time = 5 - for attempt in range(5): - try: - document_index.ensure_indices_exist( - index_embedding_dim=db_embedding_model.model_dim, - secondary_index_embedding_dim=( - secondary_db_embedding_model.model_dim - if secondary_db_embedding_model - else None - ), - ) - break - except Exception: - logger.info(f"Waiting on Vespa, retrying in {wait_time} seconds...") - time.sleep(wait_time) - - logger.info(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") - warm_up_encoders( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - model_server_host=MODEL_SERVER_HOST, - model_server_port=MODEL_SERVER_PORT, - ) + setup_vespa( + document_index, + IndexingSetting.from_db_model(search_settings), + IndexingSetting.from_db_model(secondary_search_settings) + if secondary_search_settings + else None, + ) + + logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") + if search_settings.provider_type is None: + warm_up_bi_encoder( + embedding_model=EmbeddingModel.from_db_model( + search_settings=search_settings, + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ), + ) optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__}) yield @@ -278,13 +391,15 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, cc_pair_router) include_router_with_global_prefix_prepended(application, folder_router) include_router_with_global_prefix_prepended(application, document_set_router) - include_router_with_global_prefix_prepended(application, secondary_index_router) + include_router_with_global_prefix_prepended(application, search_settings_router) include_router_with_global_prefix_prepended( application, slack_bot_management_router ) include_router_with_global_prefix_prepended(application, standard_answer_router) include_router_with_global_prefix_prepended(application, persona_router) include_router_with_global_prefix_prepended(application, admin_persona_router) + include_router_with_global_prefix_prepended(application, input_prompt_router) + include_router_with_global_prefix_prepended(application, admin_input_prompt_router) include_router_with_global_prefix_prepended(application, prompt_router) include_router_with_global_prefix_prepended(application, tool_router) include_router_with_global_prefix_prepended(application, admin_tool_router) @@ -295,9 +410,12 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, settings_admin_router) include_router_with_global_prefix_prepended(application, llm_admin_router) include_router_with_global_prefix_prepended(application, llm_router) + include_router_with_global_prefix_prepended(application, embedding_admin_router) + include_router_with_global_prefix_prepended(application, embedding_router) include_router_with_global_prefix_prepended( application, token_rate_limit_settings_router ) + include_router_with_global_prefix_prepended(application, indexing_router) if AUTH_TYPE == AuthType.DISABLED: # Server logs this during auth setup verification step @@ -388,11 +506,11 @@ def get_application() -> FastAPI: if __name__ == "__main__": - logger.info( + logger.notice( f"Starting Danswer Backend version {__version__} on http://{APP_HOST}:{str(APP_PORT)}/" ) if global_version.get_is_ee_version(): - logger.info("Running Enterprise Edition") + logger.notice("Running Enterprise Edition") uvicorn.run(app, host=APP_HOST, port=APP_PORT) diff --git a/backend/danswer/natural_language_processing/__init__.py b/backend/danswer/natural_language_processing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py new file mode 100644 index 00000000000..b7835c4e906 --- /dev/null +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -0,0 +1,385 @@ +import re +import threading +import time +from collections.abc import Callable +from functools import wraps +from typing import Any + +import requests +from httpx import HTTPError +from retry import retry + +from danswer.configs.app_configs import LARGE_CHUNK_RATIO +from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS +from danswer.configs.model_configs import ( + BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, +) +from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE +from danswer.db.models import SearchSettings +from danswer.natural_language_processing.utils import get_tokenizer +from danswer.natural_language_processing.utils import tokenizer_trim_content +from danswer.utils.logger import setup_logger +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import EmbedTextType +from shared_configs.enums import RerankerProvider +from shared_configs.model_server_models import Embedding +from shared_configs.model_server_models import EmbedRequest +from shared_configs.model_server_models import EmbedResponse +from shared_configs.model_server_models import IntentRequest +from shared_configs.model_server_models import IntentResponse +from shared_configs.model_server_models import RerankRequest +from shared_configs.model_server_models import RerankResponse +from shared_configs.utils import batch_list + +logger = setup_logger() + + +WARM_UP_STRINGS = [ + "Danswer is amazing!", + "Check out our easy deployment guide at", + "https://docs.danswer.dev/quickstart", +] + + +def clean_model_name(model_str: str) -> str: + return model_str.replace("/", "_").replace("-", "_").replace(".", "_") + + +_WHITELIST = set( + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n\t" +) +_INITIAL_FILTER = re.compile( + "[" + "\U00000080-\U0000FFFF" # All Unicode characters beyond ASCII + "\U00010000-\U0010FFFF" # All Unicode characters in supplementary planes + "]+", + flags=re.UNICODE, +) + + +def clean_openai_text(text: str) -> str: + # First, remove all weird characters + cleaned = _INITIAL_FILTER.sub("", text) + # Then, keep only whitelisted characters + return "".join(char for char in cleaned if char in _WHITELIST) + + +def build_model_server_url( + model_server_host: str, + model_server_port: int, +) -> str: + model_server_url = f"{model_server_host}:{model_server_port}" + + # use protocol if provided + if "http" in model_server_url: + return model_server_url + + # otherwise default to http + return f"http://{model_server_url}" + + +class EmbeddingModel: + def __init__( + self, + server_host: str, # Changes depending on indexing or inference + server_port: int, + model_name: str | None, + normalize: bool, + query_prefix: str | None, + passage_prefix: str | None, + api_key: str | None, + provider_type: EmbeddingProvider | None, + retrim_content: bool = False, + ) -> None: + self.api_key = api_key + self.provider_type = provider_type + self.query_prefix = query_prefix + self.passage_prefix = passage_prefix + self.normalize = normalize + self.model_name = model_name + self.retrim_content = retrim_content + self.tokenizer = get_tokenizer( + model_name=model_name, provider_type=provider_type + ) + + model_server_url = build_model_server_url(server_host, server_port) + self.embed_server_endpoint = f"{model_server_url}/encoder/bi-encoder-embed" + + def _make_model_server_request(self, embed_request: EmbedRequest) -> EmbedResponse: + def _make_request() -> EmbedResponse: + response = requests.post( + self.embed_server_endpoint, json=embed_request.model_dump() + ) + try: + response.raise_for_status() + except requests.HTTPError as e: + try: + error_detail = response.json().get("detail", str(e)) + except Exception: + error_detail = response.text + raise HTTPError(f"HTTP error occurred: {error_detail}") from e + except requests.RequestException as e: + raise HTTPError(f"Request failed: {str(e)}") from e + + return EmbedResponse(**response.json()) + + # only perform retries for the non-realtime embedding of passages (e.g. for indexing) + if embed_request.text_type == EmbedTextType.PASSAGE: + return retry(tries=3, delay=5)(_make_request)() + else: + return _make_request() + + def _batch_encode_texts( + self, + texts: list[str], + text_type: EmbedTextType, + batch_size: int, + max_seq_length: int, + ) -> list[Embedding]: + text_batches = batch_list(texts, batch_size) + + logger.debug( + f"Encoding {len(texts)} texts in {len(text_batches)} batches for local model" + ) + + embeddings: list[Embedding] = [] + for idx, text_batch in enumerate(text_batches, start=1): + logger.debug(f"Encoding batch {idx} of {len(text_batches)}") + embed_request = EmbedRequest( + model_name=self.model_name, + texts=text_batch, + max_context_length=max_seq_length, + normalize_embeddings=self.normalize, + api_key=self.api_key, + provider_type=self.provider_type, + text_type=text_type, + manual_query_prefix=self.query_prefix, + manual_passage_prefix=self.passage_prefix, + ) + + response = self._make_model_server_request(embed_request) + embeddings.extend(response.embeddings) + return embeddings + + def encode( + self, + texts: list[str], + text_type: EmbedTextType, + large_chunks_present: bool = False, + local_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS, + api_embedding_batch_size: int = BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, + max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE, + ) -> list[Embedding]: + if not texts or not all(texts): + raise ValueError(f"Empty or missing text for embedding: {texts}") + + if large_chunks_present: + max_seq_length *= LARGE_CHUNK_RATIO + + if self.retrim_content: + # This is applied during indexing as a catchall for overly long titles (or other uncapped fields) + # Note that this uses just the default tokenizer which may also lead to very minor miscountings + # However this slight miscounting is very unlikely to have any material impact. + texts = [ + tokenizer_trim_content( + content=text, + desired_length=max_seq_length, + tokenizer=self.tokenizer, + ) + for text in texts + ] + + if self.provider_type == EmbeddingProvider.OPENAI: + # If the provider is openai, we need to clean the text + # as a temporary workaround for the openai API + texts = [clean_openai_text(text) for text in texts] + + batch_size = ( + api_embedding_batch_size + if self.provider_type + else local_embedding_batch_size + ) + + return self._batch_encode_texts( + texts=texts, + text_type=text_type, + batch_size=batch_size, + max_seq_length=max_seq_length, + ) + + @classmethod + def from_db_model( + cls, + search_settings: SearchSettings, + server_host: str, # Changes depending on indexing or inference + server_port: int, + retrim_content: bool = False, + ) -> "EmbeddingModel": + return cls( + server_host=server_host, + server_port=server_port, + model_name=search_settings.model_name, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + api_key=search_settings.api_key, + provider_type=search_settings.provider_type, + retrim_content=retrim_content, + ) + + +class RerankingModel: + def __init__( + self, + model_name: str, + provider_type: RerankerProvider | None, + api_key: str | None, + model_server_host: str = MODEL_SERVER_HOST, + model_server_port: int = MODEL_SERVER_PORT, + ) -> None: + model_server_url = build_model_server_url(model_server_host, model_server_port) + self.rerank_server_endpoint = model_server_url + "/encoder/cross-encoder-scores" + self.model_name = model_name + self.provider_type = provider_type + self.api_key = api_key + + def predict(self, query: str, passages: list[str]) -> list[float]: + rerank_request = RerankRequest( + query=query, + documents=passages, + model_name=self.model_name, + provider_type=self.provider_type, + api_key=self.api_key, + ) + + response = requests.post( + self.rerank_server_endpoint, json=rerank_request.model_dump() + ) + response.raise_for_status() + + return RerankResponse(**response.json()).scores + + +class QueryAnalysisModel: + def __init__( + self, + model_server_host: str = MODEL_SERVER_HOST, + model_server_port: int = MODEL_SERVER_PORT, + # Lean heavily towards not throwing out keywords + keyword_percent_threshold: float = 0.1, + # Lean towards semantic which is the default + semantic_percent_threshold: float = 0.4, + ) -> None: + model_server_url = build_model_server_url(model_server_host, model_server_port) + self.intent_server_endpoint = model_server_url + "/custom/query-analysis" + self.keyword_percent_threshold = keyword_percent_threshold + self.semantic_percent_threshold = semantic_percent_threshold + + def predict( + self, + query: str, + ) -> tuple[bool, list[str]]: + intent_request = IntentRequest( + query=query, + keyword_percent_threshold=self.keyword_percent_threshold, + semantic_percent_threshold=self.semantic_percent_threshold, + ) + + response = requests.post( + self.intent_server_endpoint, json=intent_request.model_dump() + ) + response.raise_for_status() + + response_model = IntentResponse(**response.json()) + + return response_model.is_keyword, response_model.keywords + + +def warm_up_retry( + func: Callable[..., Any], + tries: int = 20, + delay: int = 5, + *args: Any, + **kwargs: Any, +) -> Callable[..., Any]: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + exceptions = [] + for attempt in range(tries): + try: + return func(*args, **kwargs) + except Exception as e: + exceptions.append(e) + logger.exception( + f"Attempt {attempt + 1} failed; retrying in {delay} seconds..." + ) + time.sleep(delay) + raise Exception(f"All retries failed: {exceptions}") + + return wrapper + + +def warm_up_bi_encoder( + embedding_model: EmbeddingModel, + non_blocking: bool = False, +) -> None: + warm_up_str = " ".join(WARM_UP_STRINGS) + + logger.debug(f"Warming up encoder model: {embedding_model.model_name}") + get_tokenizer( + model_name=embedding_model.model_name, + provider_type=embedding_model.provider_type, + ).encode(warm_up_str) + + def _warm_up() -> None: + try: + embedding_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) + logger.debug( + f"Warm-up complete for encoder model: {embedding_model.model_name}" + ) + except Exception as e: + logger.warning( + f"Warm-up request failed for encoder model {embedding_model.model_name}: {e}" + ) + + if non_blocking: + threading.Thread(target=_warm_up, daemon=True).start() + logger.debug( + f"Started non-blocking warm-up for encoder model: {embedding_model.model_name}" + ) + else: + retry_encode = warm_up_retry(embedding_model.encode) + retry_encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) + + +def warm_up_cross_encoder( + rerank_model_name: str, + non_blocking: bool = False, +) -> None: + logger.debug(f"Warming up reranking model: {rerank_model_name}") + + reranking_model = RerankingModel( + model_name=rerank_model_name, + provider_type=None, + api_key=None, + ) + + def _warm_up() -> None: + try: + reranking_model.predict(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:]) + logger.debug(f"Warm-up complete for reranking model: {rerank_model_name}") + except Exception as e: + logger.warning( + f"Warm-up request failed for reranking model {rerank_model_name}: {e}" + ) + + if non_blocking: + threading.Thread(target=_warm_up, daemon=True).start() + logger.debug( + f"Started non-blocking warm-up for reranking model: {rerank_model_name}" + ) + else: + retry_rerank = warm_up_retry(reranking_model.predict) + retry_rerank(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:]) diff --git a/backend/danswer/natural_language_processing/utils.py b/backend/danswer/natural_language_processing/utils.py new file mode 100644 index 00000000000..d2b9a7d7f1e --- /dev/null +++ b/backend/danswer/natural_language_processing/utils.py @@ -0,0 +1,150 @@ +import os +from abc import ABC +from abc import abstractmethod +from copy import copy + +from transformers import logging as transformer_logging # type:ignore + +from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE +from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL +from danswer.search.models import InferenceChunk +from danswer.utils.logger import setup_logger +from shared_configs.enums import EmbeddingProvider + +logger = setup_logger() +transformer_logging.set_verbosity_error() +os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" +os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1" + + +class BaseTokenizer(ABC): + @abstractmethod + def encode(self, string: str) -> list[int]: + pass + + @abstractmethod + def tokenize(self, string: str) -> list[str]: + pass + + @abstractmethod + def decode(self, tokens: list[int]) -> str: + pass + + +class TiktokenTokenizer(BaseTokenizer): + _instances: dict[str, "TiktokenTokenizer"] = {} + + def __new__(cls, encoding_name: str = "cl100k_base") -> "TiktokenTokenizer": + if encoding_name not in cls._instances: + cls._instances[encoding_name] = super(TiktokenTokenizer, cls).__new__(cls) + return cls._instances[encoding_name] + + def __init__(self, encoding_name: str = "cl100k_base"): + if not hasattr(self, "encoder"): + import tiktoken + + self.encoder = tiktoken.get_encoding(encoding_name) + + def encode(self, string: str) -> list[int]: + # this returns no special tokens + return self.encoder.encode_ordinary(string) + + def tokenize(self, string: str) -> list[str]: + return [self.encoder.decode([token]) for token in self.encode(string)] + + def decode(self, tokens: list[int]) -> str: + return self.encoder.decode(tokens) + + +class HuggingFaceTokenizer(BaseTokenizer): + def __init__(self, model_name: str): + from tokenizers import Tokenizer # type: ignore + + self.encoder = Tokenizer.from_pretrained(model_name) + + def encode(self, string: str) -> list[int]: + # this returns no special tokens + return self.encoder.encode(string, add_special_tokens=False).ids + + def tokenize(self, string: str) -> list[str]: + return self.encoder.encode(string, add_special_tokens=False).tokens + + def decode(self, tokens: list[int]) -> str: + return self.encoder.decode(tokens) + + +_TOKENIZER_CACHE: dict[str, BaseTokenizer] = {} + + +def _check_tokenizer_cache(tokenizer_name: str) -> BaseTokenizer: + global _TOKENIZER_CACHE + + if tokenizer_name not in _TOKENIZER_CACHE: + if tokenizer_name == "openai": + _TOKENIZER_CACHE[tokenizer_name] = TiktokenTokenizer("cl100k_base") + return _TOKENIZER_CACHE[tokenizer_name] + try: + logger.debug(f"Initializing HuggingFaceTokenizer for: {tokenizer_name}") + _TOKENIZER_CACHE[tokenizer_name] = HuggingFaceTokenizer(tokenizer_name) + except Exception as primary_error: + logger.error( + f"Error initializing HuggingFaceTokenizer for {tokenizer_name}: {primary_error}" + ) + logger.warning( + f"Falling back to default embedding model: {DOCUMENT_ENCODER_MODEL}" + ) + + try: + # Cache this tokenizer name to the default so we don't have to try to load it again + # and fail again + _TOKENIZER_CACHE[tokenizer_name] = HuggingFaceTokenizer( + DOCUMENT_ENCODER_MODEL + ) + except Exception as fallback_error: + logger.error( + f"Error initializing fallback HuggingFaceTokenizer: {fallback_error}" + ) + raise ValueError( + f"Failed to initialize tokenizer for {tokenizer_name} and fallback model" + ) from fallback_error + + return _TOKENIZER_CACHE[tokenizer_name] + + +_DEFAULT_TOKENIZER: BaseTokenizer = HuggingFaceTokenizer(DOCUMENT_ENCODER_MODEL) + + +def get_tokenizer( + model_name: str | None, provider_type: EmbeddingProvider | str | None +) -> BaseTokenizer: + # Currently all of the viable models use the same sentencepiece tokenizer + # OpenAI uses a different one but currently it's not supported due to quality issues + # the inconsistent chunking makes using the sentencepiece tokenizer default better for now + # LLM tokenizers are specified by strings + global _DEFAULT_TOKENIZER + return _DEFAULT_TOKENIZER + + +def tokenizer_trim_content( + content: str, desired_length: int, tokenizer: BaseTokenizer +) -> str: + tokens = tokenizer.encode(content) + if len(tokens) > desired_length: + content = tokenizer.decode(tokens[:desired_length]) + return content + + +def tokenizer_trim_chunks( + chunks: list[InferenceChunk], + tokenizer: BaseTokenizer, + max_chunk_toks: int = DOC_EMBEDDING_CONTEXT_SIZE, +) -> list[InferenceChunk]: + new_chunks = copy(chunks) + for ind, chunk in enumerate(new_chunks): + new_content = tokenizer_trim_content(chunk.content, max_chunk_toks, tokenizer) + if len(new_content) != len(chunk.content): + new_chunk = copy(chunk) + new_chunk.content = new_content + new_chunks[ind] = new_chunk + return new_chunks diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index 0b66aae166b..a5a0fe0dad5 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -9,9 +9,12 @@ from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes +from danswer.chat.models import DocumentRelevance from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse +from danswer.chat.models import RelevanceAnalysis from danswer.chat.models import StreamingError +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT from danswer.configs.chat_configs import QA_TIMEOUT from danswer.configs.constants import MessageType @@ -21,6 +24,7 @@ from danswer.db.chat import get_or_create_root_message from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.chat import translate_db_search_doc_to_server_search_doc +from danswer.db.chat import update_search_docs_table_with_relevance from danswer.db.engine import get_session_context_manager from danswer.db.models import User from danswer.db.persona import get_prompt_by_id @@ -32,16 +36,16 @@ from danswer.llm.answering.models import QuotesConfig from danswer.llm.factory import get_llms_for_persona from danswer.llm.factory import get_main_llm_from_tuple -from danswer.llm.utils import get_default_llm_token_encode +from danswer.natural_language_processing.utils import get_tokenizer from danswer.one_shot_answer.models import DirectQARequest from danswer.one_shot_answer.models import OneShotQAResponse from danswer.one_shot_answer.models import QueryRephrase from danswer.one_shot_answer.qa_utils import combine_message_thread +from danswer.search.enums import LLMEvaluationType from danswer.search.models import RerankMetricsContainer from danswer.search.models import RetrievalMetricsContainer from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.search.utils import dedupe_documents -from danswer.search.utils import drop_llm_indices from danswer.secondary_llm_flows.answer_validation import get_answer_validity from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase from danswer.server.query_and_chat.models import ChatMessageDetail @@ -57,6 +61,7 @@ from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time + logger = setup_logger() AnswerObjectIterator = Iterator[ @@ -70,6 +75,7 @@ | ChatMessageDetail | CitationInfo | ToolCallKickoff + | DocumentRelevance ] @@ -112,8 +118,12 @@ def stream_answer_objects( one_shot=True, danswerbot_flow=danswerbot_flow, ) + llm, fast_llm = get_llms_for_persona(persona=chat_session.persona) - llm_tokenizer = get_default_llm_token_encode() + llm_tokenizer = get_tokenizer( + model_name=llm.config.model_name, + provider_type=llm.config.model_provider, + ) # Create a chat session which will just store the root message, the query, and the AI response root_message = get_or_create_root_message( @@ -121,13 +131,16 @@ def stream_answer_objects( ) history_str = combine_message_thread( - messages=history, max_tokens=max_history_tokens + messages=history, + max_tokens=max_history_tokens, + llm_tokenizer=llm_tokenizer, ) - rephrased_query = thread_based_query_rephrase( + rephrased_query = query_req.query_override or thread_based_query_rephrase( user_query=query_msg.message, history_str=history_str, ) + # Given back ahead of the documents for latency reasons # In chat flow it's given back along with the documents yield QueryRephrase(rephrased_query=rephrased_query) @@ -152,13 +165,12 @@ def stream_answer_objects( parent_message=root_message, prompt_id=query_req.prompt_id, message=query_msg.message, - token_count=len(llm_tokenizer(query_msg.message)), + token_count=len(llm_tokenizer.encode(query_msg.message)), message_type=MessageType.USER, db_session=db_session, commit=True, ) - llm, fast_llm = get_llms_for_persona(persona=chat_session.persona) prompt_config = PromptConfig.from_model(prompt) document_pruning_config = DocumentPruningConfig( max_chunks=int( @@ -167,11 +179,14 @@ def stream_answer_objects( else default_num_chunks ), max_tokens=max_document_tokens, - use_sections=query_req.chunks_above > 0 or query_req.chunks_below > 0, ) + search_tool = SearchTool( db_session=db_session, user=user, + evaluation_type=LLMEvaluationType.SKIP + if DISABLE_LLM_DOC_RELEVANCE + else query_req.evaluation_type, persona=chat_session.persona, retrieval_options=query_req.retrieval_options, prompt_config=prompt_config, @@ -189,6 +204,7 @@ def stream_answer_objects( quotes_config=QuotesConfig() if not use_citations else None, document_pruning_config=document_pruning_config, ) + answer = Answer( question=query_msg.message, answer_style_config=answer_config, @@ -197,6 +213,7 @@ def stream_answer_objects( single_message_history=history_str, tools=[search_tool], force_use_tool=ForceUseTool( + force_use=True, tool_name=search_tool.name, args={"query": rephrased_query}, ), @@ -204,12 +221,15 @@ def stream_answer_objects( # tested quotes with tool calling too much yet skip_explicit_tool_calling=True, return_contexts=query_req.return_contexts, + skip_gen_ai_answer_generation=query_req.skip_gen_ai_answer_generation, ) + # won't be any ImageGenerationDisplay responses since that tool is never passed in - dropped_inds: list[int] = [] + for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): # for one-shot flow, don't currently do anything with these if isinstance(packet, ToolResponse): + # (likely fine that it comes after the initial creation of the search docs) if packet.id == SEARCH_RESPONSE_SUMMARY_ID: search_response_summary = cast(SearchResponseSummary, packet.response) @@ -242,19 +262,31 @@ def stream_answer_objects( recency_bias_multiplier=search_response_summary.recency_bias_multiplier, ) yield initial_response - elif packet.id == SECTION_RELEVANCE_LIST_ID: - chunk_indices = packet.response - - if reference_db_search_docs is not None and dropped_inds: - chunk_indices = drop_llm_indices( - llm_indices=chunk_indices, - search_docs=reference_db_search_docs, - dropped_indices=dropped_inds, - ) - yield LLMRelevanceFilterResponse(relevant_chunk_indices=packet.response) elif packet.id == SEARCH_DOC_CONTENT_ID: yield packet.response + + elif packet.id == SECTION_RELEVANCE_LIST_ID: + document_based_response = {} + + if packet.response is not None: + for evaluation in packet.response: + document_based_response[ + evaluation.document_id + ] = RelevanceAnalysis( + relevant=evaluation.relevant, content=evaluation.content + ) + + evaluation_response = DocumentRelevance( + relevance_summaries=document_based_response + ) + if reference_db_search_docs is not None: + update_search_docs_table_with_relevance( + db_session=db_session, + reference_db_search_docs=reference_db_search_docs, + relevance_summary=evaluation_response, + ) + yield evaluation_response else: yield packet @@ -264,7 +296,7 @@ def stream_answer_objects( parent_message=new_user_message, prompt_id=query_req.prompt_id, message=answer.llm_answer, - token_count=len(llm_tokenizer(answer.llm_answer)), + token_count=len(llm_tokenizer.encode(answer.llm_answer)), message_type=MessageType.ASSISTANT, error=None, reference_docs=reference_db_search_docs, @@ -275,7 +307,6 @@ def stream_answer_objects( msg_detail_response = translate_db_message_to_chat_message_detail( gen_ai_response_message ) - yield msg_detail_response @@ -295,7 +326,7 @@ def stream_search_answer( db_session=session, ) for obj in objects: - yield get_json_line(obj.dict()) + yield get_json_line(obj.model_dump()) def get_search_answer( diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index 86819916430..d7e81975630 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -1,15 +1,15 @@ -from typing import Any - from pydantic import BaseModel from pydantic import Field -from pydantic import root_validator +from pydantic import model_validator from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes from danswer.chat.models import QADocsResponse from danswer.configs.constants import MessageType +from danswer.search.enums import LLMEvaluationType from danswer.search.models import ChunkContext +from danswer.search.models import RerankingDetails from danswer.search.models import RetrievalDetails @@ -19,7 +19,7 @@ class QueryRephrase(BaseModel): class ThreadMessage(BaseModel): message: str - sender: str | None + sender: str | None = None role: MessageType = MessageType.USER @@ -27,28 +27,32 @@ class DirectQARequest(ChunkContext): messages: list[ThreadMessage] prompt_id: int | None persona_id: int + multilingual_query_expansion: list[str] | None = None retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) - # This is to forcibly skip (or run) the step, if None it uses the system defaults - skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None + rerank_settings: RerankingDetails | None = None + evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED + chain_of_thought: bool = False return_contexts: bool = False - @root_validator - def check_chain_of_thought_and_prompt_id( - cls, values: dict[str, Any] - ) -> dict[str, Any]: - chain_of_thought = values.get("chain_of_thought") - prompt_id = values.get("prompt_id") + # allows the caller to specify the exact search query they want to use + # can be used if the message sent to the LLM / query should not be the same + # will also disable Thread-based Rewording if specified + query_override: str | None = None + + # If True, skips generative an AI response to the search query + skip_gen_ai_answer_generation: bool = False - if chain_of_thought and prompt_id is not None: + @model_validator(mode="after") + def check_chain_of_thought_and_prompt_id(self) -> "DirectQARequest": + if self.chain_of_thought and self.prompt_id is not None: raise ValueError( "If chain_of_thought is True, prompt_id must be None" "The chain of thought prompt is only for question " "answering and does not accept customizing." ) - return values + return self class OneShotQAResponse(BaseModel): diff --git a/backend/danswer/one_shot_answer/qa_utils.py b/backend/danswer/one_shot_answer/qa_utils.py index e912a915e2e..6fbad99eff1 100644 --- a/backend/danswer/one_shot_answer/qa_utils.py +++ b/backend/danswer/one_shot_answer/qa_utils.py @@ -1,8 +1,7 @@ -from collections.abc import Callable from collections.abc import Generator from danswer.configs.constants import MessageType -from danswer.llm.utils import get_default_llm_token_encode +from danswer.natural_language_processing.utils import BaseTokenizer from danswer.one_shot_answer.models import ThreadMessage from danswer.utils.logger import setup_logger @@ -18,7 +17,7 @@ def simulate_streaming_response(model_out: str) -> Generator[str, None, None]: def combine_message_thread( messages: list[ThreadMessage], max_tokens: int | None, - llm_tokenizer: Callable | None = None, + llm_tokenizer: BaseTokenizer, ) -> str: """Used to create a single combined message context from threads""" if not messages: @@ -26,8 +25,6 @@ def combine_message_thread( message_strs: list[str] = [] total_token_count = 0 - if llm_tokenizer is None: - llm_tokenizer = get_default_llm_token_encode() for message in reversed(messages): if message.role == MessageType.USER: @@ -42,7 +39,7 @@ def combine_message_thread( role_str = message.role.value.upper() msg_str = f"{role_str}:\n{message.message}" - message_token_count = len(llm_tokenizer(msg_str)) + message_token_count = len(llm_tokenizer.encode(msg_str)) if ( max_tokens is not None diff --git a/backend/danswer/prompts/agentic_evaluation.py b/backend/danswer/prompts/agentic_evaluation.py new file mode 100644 index 00000000000..546f40c7f8e --- /dev/null +++ b/backend/danswer/prompts/agentic_evaluation.py @@ -0,0 +1,44 @@ +AGENTIC_SEARCH_SYSTEM_PROMPT = """ +You are an expert at evaluating the relevance of a document to a search query. +Provided a document and a search query, you determine if the document is relevant to the user query. +You ALWAYS output the 3 sections described below and every section always begins with the same header line. +The "Chain of Thought" is to help you understand the document and query and their relevance to one another. +The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them. +The "Final Relevance Determination" is always a single True or False. + +You always output your response following these 3 sections: + +1. Chain of Thought: +Provide a chain of thought analysis considering: +- The main purpose and content of the document +- What the user is searching for +- How the document relates to the query +- Potential uses of the document for the given query +Be thorough, but avoid unnecessary repetition. Think step by step. + +2. Useful Analysis: +Summarize the contents of the document as it relates to the user query. +BE ABSOLUTELY AS CONCISE AS POSSIBLE. +If the document is not useful, briefly mention the what the document is about. +Do NOT say whether this document is useful or not useful, ONLY provide the summary. +If referring to the document, prefer using "this" document over "the" document. + +3. Final Relevance Determination: +True or False +""" + +AGENTIC_SEARCH_USER_PROMPT = """ + +Document Title: {title}{optional_metadata} +``` +{content} +``` + +Query: +{query} + +Be sure to run through the 3 steps of evaluation: +1. Chain of Thought +2. Useful Analysis +3. Final Relevance Determination +""".strip() diff --git a/backend/danswer/prompts/constants.py b/backend/danswer/prompts/constants.py index 40a37fc325e..d5734908537 100644 --- a/backend/danswer/prompts/constants.py +++ b/backend/danswer/prompts/constants.py @@ -7,7 +7,6 @@ ANSWER_PAT = "Answer:" ANSWERABLE_PAT = "Answerable:" FINAL_ANSWER_PAT = "Final Answer:" -UNCERTAINTY_PAT = "?" QUOTE_PAT = "Quote:" QUOTES_PAT_PLURAL = "Quotes:" INVALID_PAT = "Invalid:" diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index 64a704fa693..16768963931 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -7,7 +7,6 @@ from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.prompts.constants import QUESTION_PAT from danswer.prompts.constants import THOUGHT_PAT -from danswer.prompts.constants import UNCERTAINTY_PAT ONE_SHOT_SYSTEM_PROMPT = """ @@ -66,9 +65,6 @@ } -ANSWER_NOT_FOUND_RESPONSE = f'{{"answer": "{UNCERTAINTY_PAT}", "quotes": []}}' - - # Default json prompt which can reference multiple docs and provide answer + quotes # system_like_header is similar to system message, can be user provided or defaults to QA_HEADER # context/history blocks are for context documents and conversation history, they can be blank diff --git a/backend/danswer/prompts/llm_chunk_filter.py b/backend/danswer/prompts/llm_chunk_filter.py index fe0b8d398db..2783ac11e22 100644 --- a/backend/danswer/prompts/llm_chunk_filter.py +++ b/backend/danswer/prompts/llm_chunk_filter.py @@ -5,12 +5,15 @@ USEFUL_PAT = "Yes useful" NONUSEFUL_PAT = "Not useful" SECTION_FILTER_PROMPT = f""" -Determine if the reference section is USEFUL for answering the user query. +Determine if the following section is USEFUL for answering the user query. It is NOT enough for the section to be related to the query, \ it must contain information that is USEFUL for answering the query. If the section contains ANY useful information, that is good enough, \ it does not need to fully answer the every part of the user query. + +Title: {{title}} +{{optional_metadata}} Reference Section: ``` {{chunk_text}} diff --git a/backend/danswer/prompts/prompt_utils.py b/backend/danswer/prompts/prompt_utils.py index 6d7bddeec95..cd59e97061f 100644 --- a/backend/danswer/prompts/prompt_utils.py +++ b/backend/danswer/prompts/prompt_utils.py @@ -6,7 +6,6 @@ from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import LANGUAGE_HINT -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.constants import DocumentSource from danswer.db.models import Prompt from danswer.llm.answering.models import PromptConfig @@ -56,7 +55,7 @@ def add_date_time_to_prompt(prompt_str: str) -> str: def build_task_prompt_reminders( prompt: Prompt | PromptConfig, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), + use_language_hint: bool, citation_str: str = CITATION_REMINDER, language_hint_str: str = LANGUAGE_HINT, ) -> str: diff --git a/backend/danswer/search/enums.py b/backend/danswer/search/enums.py index 39908335522..28f81704789 100644 --- a/backend/danswer/search/enums.py +++ b/backend/danswer/search/enums.py @@ -4,13 +4,6 @@ from enum import Enum -class OptionalSearchSetting(str, Enum): - ALWAYS = "always" - NEVER = "never" - # Determine whether to run search based on history and latest query - AUTO = "auto" - - class RecencyBiasSetting(str, Enum): FAVOR_RECENT = "favor_recent" # 2x decay rate BASE_DECAY = "base_decay" @@ -19,17 +12,25 @@ class RecencyBiasSetting(str, Enum): AUTO = "auto" +class OptionalSearchSetting(str, Enum): + ALWAYS = "always" + NEVER = "never" + # Determine whether to run search based on history and latest query + AUTO = "auto" + + class SearchType(str, Enum): KEYWORD = "keyword" SEMANTIC = "semantic" - HYBRID = "hybrid" + + +class LLMEvaluationType(str, Enum): + AGENTIC = "agentic" # applies agentic evaluation + BASIC = "basic" # applies boolean evaluation + SKIP = "skip" # skips evaluation + UNSPECIFIED = "unspecified" # reverts to default class QueryFlow(str, Enum): SEARCH = "search" QUESTION_ANSWER = "question-answer" - - -class EmbedTextType(str, Enum): - QUERY = "query" - PASSAGE = "passage" diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index 6e16de2c75e..15387e6c63e 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -2,20 +2,20 @@ from typing import Any from pydantic import BaseModel -from pydantic import validator +from pydantic import ConfigDict +from pydantic import Field +from pydantic import field_validator -from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE -from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW -from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER -from danswer.configs.chat_configs import HYBRID_ALPHA -from danswer.configs.chat_configs import NUM_RERANKED_RESULTS from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.constants import DocumentSource from danswer.db.models import Persona +from danswer.db.models import SearchSettings from danswer.indexing.models import BaseChunk +from danswer.indexing.models import IndexingSetting +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import OptionalSearchSetting from danswer.search.enums import SearchType -from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW +from shared_configs.enums import RerankerProvider MAX_METRICS_CONTENT = ( @@ -23,6 +23,67 @@ ) +class RerankingDetails(BaseModel): + # If model is None (or num_rerank is 0), then reranking is turned off + rerank_model_name: str | None + rerank_provider_type: RerankerProvider | None + rerank_api_key: str | None = None + + num_rerank: int + + # For faster flows where the results should start immediately + # this more time intensive step can be skipped + disable_rerank_for_streaming: bool = False + + @classmethod + def from_db_model(cls, search_settings: SearchSettings) -> "RerankingDetails": + return cls( + rerank_model_name=search_settings.rerank_model_name, + rerank_provider_type=search_settings.rerank_provider_type, + rerank_api_key=search_settings.rerank_api_key, + num_rerank=search_settings.num_rerank, + ) + + +class InferenceSettings(RerankingDetails): + # Empty for no additional expansion + multilingual_expansion: list[str] + + +class SearchSettingsCreationRequest(InferenceSettings, IndexingSetting): + @classmethod + def from_db_model( + cls, search_settings: SearchSettings + ) -> "SearchSettingsCreationRequest": + inference_settings = InferenceSettings.from_db_model(search_settings) + indexing_setting = IndexingSetting.from_db_model(search_settings) + + return cls(**inference_settings.dict(), **indexing_setting.dict()) + + +class SavedSearchSettings(InferenceSettings, IndexingSetting): + @classmethod + def from_db_model(cls, search_settings: SearchSettings) -> "SavedSearchSettings": + return cls( + # Indexing Setting + model_name=search_settings.model_name, + model_dim=search_settings.model_dim, + normalize=search_settings.normalize, + query_prefix=search_settings.query_prefix, + passage_prefix=search_settings.passage_prefix, + provider_type=search_settings.provider_type, + index_name=search_settings.index_name, + multipass_indexing=search_settings.multipass_indexing, + # Reranking Details + rerank_model_name=search_settings.rerank_model_name, + rerank_provider_type=search_settings.rerank_provider_type, + rerank_api_key=search_settings.rerank_api_key, + num_rerank=search_settings.num_rerank, + # Multilingual Expansion + multilingual_expansion=search_settings.multilingual_expansion, + ) + + class Tag(BaseModel): tag_key: str tag_value: str @@ -47,24 +108,24 @@ class ChunkMetric(BaseModel): class ChunkContext(BaseModel): - # Additional surrounding context options, if full doc, then chunks are deduped - # If surrounding context overlap, it is combined into one - chunks_above: int = CONTEXT_CHUNKS_ABOVE - chunks_below: int = CONTEXT_CHUNKS_BELOW + # If not specified (None), picked up from Persona settings if there is space + # if specified (even if 0), it always uses the specified number of chunks above and below + chunks_above: int | None = None + chunks_below: int | None = None full_doc: bool = False - @validator("chunks_above", "chunks_below", pre=True, each_item=False) + @field_validator("chunks_above", "chunks_below") + @classmethod def check_non_negative(cls, value: int, field: Any) -> int: - if value < 0: + if value is not None and value < 0: raise ValueError(f"{field.name} must be non-negative") return value class SearchRequest(ChunkContext): - """Input to the SearchPipeline.""" - query: str - search_type: SearchType = SearchType.HYBRID + + search_type: SearchType = SearchType.SEMANTIC human_selected_filters: BaseFilters | None = None enable_auto_detect_filters: bool | None = None @@ -74,32 +135,36 @@ class SearchRequest(ChunkContext): offset: int | None = None limit: int | None = None + multilingual_expansion: list[str] | None = None recency_bias_multiplier: float = 1.0 - hybrid_alpha: float = HYBRID_ALPHA - # This is to forcibly skip (or run) the step, if None it uses the system defaults - skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None - - class Config: - arbitrary_types_allowed = True + hybrid_alpha: float | None = None + rerank_settings: RerankingDetails | None = None + evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED + model_config = ConfigDict(arbitrary_types_allowed=True) class SearchQuery(ChunkContext): + "Processed Request that is directly passed to the SearchPipeline" query: str + processed_keywords: list[str] + search_type: SearchType + evaluation_type: LLMEvaluationType filters: IndexFilters + + # by this point, the chunks_above and chunks_below must be set + chunks_above: int + chunks_below: int + + rerank_settings: RerankingDetails | None + hybrid_alpha: float recency_bias_multiplier: float + + # Only used if LLM evaluation type is not skip, None to use default settings + max_llm_filter_sections: int + num_hits: int = NUM_RETURNED_HITS offset: int = 0 - search_type: SearchType = SearchType.HYBRID - skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW - skip_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER - # Only used if not skip_rerank - num_rerank: int | None = NUM_RERANKED_RESULTS - # Only used if not skip_llm_chunk_filter - max_llm_filter_sections: int = NUM_RERANKED_RESULTS - - class Config: - frozen = True + model_config = ConfigDict(frozen=True) class RetrievalDetails(ChunkContext): @@ -126,19 +191,24 @@ class InferenceChunk(BaseChunk): document_id: str source_type: DocumentSource semantic_identifier: str + title: str | None # Separate from Semantic Identifier though often same boost: int recency_bias: float score: float | None hidden: bool + is_relevant: bool | None = None + relevance_explanation: str | None = None metadata: dict[str, str | list[str]] # Matched sections in the chunk. Uses Vespa syntax e.g. TEXT # to specify that a set of words should be highlighted. For example: # ["the answer is 42", "he couldn't find an answer"] match_highlights: list[str] + # when the doc was last updated updated_at: datetime | None primary_owners: list[str] | None = None secondary_owners: list[str] | None = None + large_chunk_reference_ids: list[int] = Field(default_factory=list) @property def unique_id(self) -> str: @@ -189,6 +259,21 @@ def __gt__(self, other: Any) -> bool: return self.score > other.score +class InferenceChunkUncleaned(InferenceChunk): + metadata_suffix: str | None + + def to_inference_chunk(self) -> InferenceChunk: + # Create a dict of all fields except 'metadata_suffix' + # Assumes the cleaning has already been applied and just needs to translate to the right type + inference_chunk_data = { + k: v + for k, v in self.model_dump().items() + if k + not in ["metadata_suffix"] # May be other fields to throw out in the future + } + return InferenceChunk(**inference_chunk_data) + + class InferenceSection(BaseModel): """Section list of chunks with a combined content. A section could be a single chunk, several chunks from the same document or the entire document.""" @@ -202,7 +287,7 @@ class SearchDoc(BaseModel): document_id: str chunk_ind: int semantic_identifier: str - link: str | None + link: str | None = None blurb: str source_type: DocumentSource boost: int @@ -211,19 +296,21 @@ class SearchDoc(BaseModel): # be `True` when doing an admin search hidden: bool metadata: dict[str, str | list[str]] - score: float | None + score: float | None = None + is_relevant: bool | None = None + relevance_explanation: str | None = None # Matched sections in the doc. Uses Vespa syntax e.g. TEXT # to specify that a set of words should be highlighted. For example: # ["the answer is 42", "the answer is 42""] match_highlights: list[str] # when the doc was last updated - updated_at: datetime | None - primary_owners: list[str] | None - secondary_owners: list[str] | None + updated_at: datetime | None = None + primary_owners: list[str] | None = None + secondary_owners: list[str] | None = None is_internet: bool = False - def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore - initial_dict = super().dict(*args, **kwargs) # type: ignore + def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + initial_dict = super().model_dump(*args, **kwargs) # type: ignore initial_dict["updated_at"] = ( self.updated_at.isoformat() if self.updated_at else None ) @@ -241,7 +328,7 @@ def from_search_doc( """IMPORTANT: careful using this and not providing a db_doc_id If db_doc_id is not provided, it won't be able to actually fetch the saved doc and info later on. So only skip providing this if the SavedSearchDoc will not be used in the future""" - search_doc_data = search_doc.dict() + search_doc_data = search_doc.model_dump() search_doc_data["score"] = search_doc_data.get("score") or 0.0 return cls(**search_doc_data, db_doc_id=db_doc_id) diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py index 2d990c15a3f..ad3e19e149d 100644 --- a/backend/danswer/search/pipeline.py +++ b/backend/danswer/search/pipeline.py @@ -5,13 +5,18 @@ from sqlalchemy.orm import Session -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION -from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.chat.models import SectionRelevancePiece +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings from danswer.document_index.factory import get_default_document_index +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prune_and_merge import _merge_sections from danswer.llm.answering.prune_and_merge import ChunkRange from danswer.llm.answering.prune_and_merge import merge_chunk_intervals from danswer.llm.interfaces import LLM +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType from danswer.search.models import IndexFilters @@ -21,12 +26,17 @@ from danswer.search.models import RetrievalMetricsContainer from danswer.search.models import SearchQuery from danswer.search.models import SearchRequest +from danswer.search.postprocessing.postprocessing import cleanup_chunks from danswer.search.postprocessing.postprocessing import search_postprocessing from danswer.search.preprocessing.preprocessing import retrieval_preprocessing from danswer.search.retrieval.search_runner import retrieve_chunks from danswer.search.utils import inference_section_from_chunks +from danswer.search.utils import relevant_sections_to_indices +from danswer.secondary_llm_flows.agentic_evaluation import evaluate_inference_section from danswer.utils.logger import setup_logger -from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel +from danswer.utils.threadpool_concurrency import FunctionCall +from danswer.utils.threadpool_concurrency import run_functions_in_parallel +from danswer.utils.timing import log_function_time logger = setup_logger() @@ -40,9 +50,11 @@ def __init__( fast_llm: LLM, db_session: Session, bypass_acl: bool = False, # NOTE: VERY DANGEROUS, USE WITH CAUTION - retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] - | None = None, + retrieval_metrics_callback: ( + Callable[[RetrievalMetricsContainer], None] | None + ) = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, + prompt_config: PromptConfig | None = None, ): self.search_request = search_request self.user = user @@ -53,16 +65,16 @@ def __init__( self.retrieval_metrics_callback = retrieval_metrics_callback self.rerank_metrics_callback = rerank_metrics_callback - self.embedding_model = get_current_db_embedding_model(db_session) + self.search_settings = get_current_search_settings(db_session) self.document_index = get_default_document_index( - primary_index_name=self.embedding_model.index_name, + primary_index_name=self.search_settings.index_name, secondary_index_name=None, ) + self.prompt_config: PromptConfig | None = prompt_config # Preprocessing steps generate this self._search_query: SearchQuery | None = None self._predicted_search_type: SearchType | None = None - self._predicted_flow: QueryFlow | None = None # Initial document index retrieval chunks self._retrieved_chunks: list[InferenceChunk] | None = None @@ -71,21 +83,22 @@ def __init__( # Reranking and LLM section selection can be run together # If only LLM selection is on, the reranked chunks are yielded immediatly self._reranked_sections: list[InferenceSection] | None = None - self._relevant_section_indices: list[int] | None = None + self._final_context_sections: list[InferenceSection] | None = None + + self._section_relevance: list[SectionRelevancePiece] | None = None # Generates reranked chunks and LLM selections - self._postprocessing_generator: Iterator[ - list[InferenceSection] | list[int] - ] | None = None + self._postprocessing_generator: ( + Iterator[list[InferenceSection] | list[SectionRelevancePiece]] | None + ) = None + + # No longer computed but keeping around in case it's reintroduced later + self._predicted_flow: QueryFlow | None = QueryFlow.QUESTION_ANSWER """Pre-processing""" def _run_preprocessing(self) -> None: - ( - final_search_query, - predicted_search_type, - predicted_flow, - ) = retrieval_preprocessing( + final_search_query = retrieval_preprocessing( search_request=self.search_request, user=self.user, llm=self.llm, @@ -93,8 +106,7 @@ def _run_preprocessing(self) -> None: bypass_acl=self.bypass_acl, ) self._search_query = final_search_query - self._predicted_search_type = predicted_search_type - self._predicted_flow = predicted_flow + self._predicted_search_type = final_search_query.search_type @property def search_query(self) -> SearchQuery: @@ -124,24 +136,20 @@ def predicted_flow(self) -> QueryFlow: """Retrieval and Postprocessing""" def _get_chunks(self) -> list[InferenceChunk]: - """TODO as a future extension: - If large chunks (above 512 tokens) are used which cannot be directly fed to the LLM, - This step should run the two retrievals to get all of the base size chunks - """ if self._retrieved_chunks is not None: return self._retrieved_chunks + # These chunks do not include large chunks and have been deduped self._retrieved_chunks = retrieve_chunks( query=self.search_query, document_index=self.document_index, db_session=self.db_session, - hybrid_alpha=self.search_request.hybrid_alpha, - multilingual_expansion_str=MULTILINGUAL_QUERY_EXPANSION, retrieval_metrics_callback=self.retrieval_metrics_callback, ) return cast(list[InferenceChunk], self._retrieved_chunks) + @log_function_time(print_only=True) def _get_sections(self) -> list[InferenceSection]: """Returns an expanded section from each of the chunks. If whole docs (instead of above/below context) is specified then it will give back all of the whole docs @@ -152,48 +160,50 @@ def _get_sections(self) -> list[InferenceSection]: if self._retrieved_sections is not None: return self._retrieved_sections + # These chunks are ordered, deduped, and contain no large chunks retrieved_chunks = self._get_chunks() above = self.search_query.chunks_above below = self.search_query.chunks_below - functions_with_args: list[tuple[Callable, tuple]] = [] expanded_inference_sections = [] + inference_chunks: list[InferenceChunk] = [] + chunk_requests: list[VespaChunkRequest] = [] # Full doc setting takes priority if self.search_query.full_doc: seen_document_ids = set() - unique_chunks = [] + # This preserves the ordering since the chunks are retrieved in score order for chunk in retrieved_chunks: if chunk.document_id not in seen_document_ids: seen_document_ids.add(chunk.document_id) - unique_chunks.append(chunk) - - functions_with_args.append( - ( - self.document_index.id_based_retrieval, - ( - chunk.document_id, - None, # Start chunk ind - None, # End chunk ind - # There is no chunk level permissioning, this expansion around chunks - # can be assumed to be safe - IndexFilters(access_control_list=None), - ), + chunk_requests.append( + VespaChunkRequest( + document_id=chunk.document_id, ) ) - list_inference_chunks = run_functions_tuples_in_parallel( - functions_with_args, allow_failures=False + inference_chunks.extend( + cleanup_chunks( + self.document_index.id_based_retrieval( + chunk_requests=chunk_requests, + filters=IndexFilters(access_control_list=None), + ) + ) ) - for ind, chunk in enumerate(unique_chunks): - inf_chunks = list_inference_chunks[ind] + # Create a dictionary to group chunks by document_id + grouped_inference_chunks: dict[str, list[InferenceChunk]] = {} + for chunk in inference_chunks: + if chunk.document_id not in grouped_inference_chunks: + grouped_inference_chunks[chunk.document_id] = [] + grouped_inference_chunks[chunk.document_id].append(chunk) + for chunk_group in grouped_inference_chunks.values(): inference_section = inference_section_from_chunks( - center_chunk=chunk, - chunks=inf_chunks, + center_chunk=chunk_group[0], + chunks=chunk_group, ) if inference_section is not None: @@ -228,36 +238,36 @@ def _get_sections(self) -> list[InferenceSection]: merged_ranges = [ merge_chunk_intervals(ranges) for ranges in doc_chunk_ranges_map.values() ] - flat_ranges = [r for ranges in merged_ranges for r in ranges] + + flat_ranges: list[ChunkRange] = [r for ranges in merged_ranges for r in ranges] for chunk_range in flat_ranges: - functions_with_args.append( - ( - # If Large Chunks are introduced, additional filters need to be added here - self.document_index.id_based_retrieval, - ( - # Only need the document_id here, just use any chunk in the range is fine - chunk_range.chunks[0].document_id, - chunk_range.start, - chunk_range.end, - # There is no chunk level permissioning, this expansion around chunks - # can be assumed to be safe - IndexFilters(access_control_list=None), - ), + # Don't need to fetch chunks within range for merging if chunk_above / below are 0. + if above == below == 0: + inference_chunks.extend(chunk_range.chunks) + + else: + chunk_requests.append( + VespaChunkRequest( + document_id=chunk_range.chunks[0].document_id, + min_chunk_ind=chunk_range.start, + max_chunk_ind=chunk_range.end, + ) ) - ) - # list of list of inference chunks where the inner list needs to be combined for content - list_inference_chunks = run_functions_tuples_in_parallel( - functions_with_args, allow_failures=False - ) - flattened_inference_chunks = [ - chunk for sublist in list_inference_chunks for chunk in sublist - ] + if chunk_requests: + inference_chunks.extend( + cleanup_chunks( + self.document_index.id_based_retrieval( + chunk_requests=chunk_requests, + filters=IndexFilters(access_control_list=None), + batch_retrieval=True, + ) + ) + ) doc_chunk_ind_to_chunk = { - (chunk.document_id, chunk.chunk_id): chunk - for chunk in flattened_inference_chunks + (chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks } # Build the surroundings for all of the initial retrieved chunks @@ -314,18 +324,71 @@ def reranked_sections(self) -> list[InferenceSection]: return self._reranked_sections @property - def relevant_section_indices(self) -> list[int]: - if self._relevant_section_indices is not None: - return self._relevant_section_indices + def final_context_sections(self) -> list[InferenceSection]: + if self._final_context_sections is not None: + return self._final_context_sections - self._relevant_section_indices = next( - cast(Iterator[list[int]], self._postprocessing_generator) - ) - return self._relevant_section_indices + self._final_context_sections = _merge_sections(sections=self.reranked_sections) + return self._final_context_sections + + @property + def section_relevance(self) -> list[SectionRelevancePiece] | None: + if self._section_relevance is not None: + return self._section_relevance + + if ( + self.search_query.evaluation_type == LLMEvaluationType.SKIP + or DISABLE_LLM_DOC_RELEVANCE + ): + return None + + if self.search_query.evaluation_type == LLMEvaluationType.UNSPECIFIED: + raise ValueError( + "Attempted to access section relevance scores on search query with evaluation type `UNSPECIFIED`." + + "The search query evaluation type should have been specified." + ) + + if self.search_query.evaluation_type == LLMEvaluationType.AGENTIC: + sections = self.final_context_sections + functions = [ + FunctionCall( + evaluate_inference_section, + (section, self.search_query.query, self.llm), + ) + for section in sections + ] + try: + results = run_functions_in_parallel(function_calls=functions) + self._section_relevance = list(results.values()) + except Exception: + raise ValueError( + "An issue occured during the agentic evaluation proecss." + ) + + elif self.search_query.evaluation_type == LLMEvaluationType.BASIC: + if DISABLE_LLM_DOC_RELEVANCE: + raise ValueError( + "Basic search evaluation operation called while DISABLE_LLM_DOC_RELEVANCE is enabled." + ) + self._section_relevance = next( + cast( + Iterator[list[SectionRelevancePiece]], + self._postprocessing_generator, + ) + ) + + else: + # All other cases should have been handled above + raise ValueError( + f"Unexpected evaluation type: {self.search_query.evaluation_type}" + ) + + return self._section_relevance @property def section_relevance_list(self) -> list[bool]: - return [ - True if ind in self.relevant_section_indices else False - for ind in range(len(self.reranked_sections)) - ] + llm_indices = relevant_sections_to_indices( + relevance_sections=self.section_relevance, + items=self.final_context_sections, + ) + return [ind in llm_indices for ind in range(len(self.final_context_sections))] diff --git a/backend/danswer/search/postprocessing/postprocessing.py b/backend/danswer/search/postprocessing/postprocessing.py index b457549917f..6a3d2dc2dcd 100644 --- a/backend/danswer/search/postprocessing/postprocessing.py +++ b/backend/danswer/search/postprocessing/postprocessing.py @@ -4,20 +4,24 @@ import numpy +from danswer.chat.models import SectionRelevancePiece +from danswer.configs.app_configs import BLURB_SIZE +from danswer.configs.constants import RETURN_SEPARATOR from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MIN from danswer.document_index.document_index_utils import ( translate_boost_count_to_multiplier, ) from danswer.llm.interfaces import LLM +from danswer.natural_language_processing.search_nlp_models import RerankingModel +from danswer.search.enums import LLMEvaluationType from danswer.search.models import ChunkMetric from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceChunkUncleaned from danswer.search.models import InferenceSection from danswer.search.models import MAX_METRICS_CONTENT from danswer.search.models import RerankMetricsContainer from danswer.search.models import SearchQuery -from danswer.search.models import SearchType -from danswer.search.search_nlp_models import CrossEncoderEnsembleModel from danswer.secondary_llm_flows.chunk_usefulness import llm_batch_eval_sections from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import FunctionCall @@ -35,21 +39,45 @@ def _log_top_section_links(search_flow: str, sections: list[InferenceSection]) - else "No Link" for section in sections ] - logger.info(f"Top links from {search_flow} search: {', '.join(top_links)}") + logger.debug(f"Top links from {search_flow} search: {', '.join(top_links)}") -def should_rerank(query: SearchQuery) -> bool: - # Don't re-rank for keyword search - return query.search_type != SearchType.KEYWORD and not query.skip_rerank +def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]: + def _remove_title(chunk: InferenceChunkUncleaned) -> str: + if not chunk.title or not chunk.content: + return chunk.content + if chunk.content.startswith(chunk.title): + return chunk.content[len(chunk.title) :].lstrip() -def should_apply_llm_based_relevance_filter(query: SearchQuery) -> bool: - return not query.skip_llm_chunk_filter + # BLURB SIZE is by token instead of char but each token is at least 1 char + # If this prefix matches the content, it's assumed the title was prepended + if chunk.content.startswith(chunk.title[:BLURB_SIZE]): + return ( + chunk.content.split(RETURN_SEPARATOR, 1)[-1] + if RETURN_SEPARATOR in chunk.content + else chunk.content + ) + + return chunk.content + + def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str: + if not chunk.metadata_suffix: + return chunk.content + return chunk.content.removesuffix(chunk.metadata_suffix).rstrip( + RETURN_SEPARATOR + ) + + for chunk in chunks: + chunk.content = _remove_title(chunk) + chunk.content = _remove_metadata_suffix(chunk) + + return [chunk.to_inference_chunk() for chunk in chunks] @log_function_time(print_only=True) def semantic_reranking( - query: str, + query: SearchQuery, chunks: list[InferenceChunk], model_min: int = CROSS_ENCODER_RANGE_MIN, model_max: int = CROSS_ENCODER_RANGE_MAX, @@ -60,11 +88,28 @@ def semantic_reranking( Note: this updates the chunks in place, it updates the chunk scores which came from retrieval """ - cross_encoders = CrossEncoderEnsembleModel() - passages = [chunk.content for chunk in chunks] - sim_scores_floats = cross_encoders.predict(query=query, passages=passages) + rerank_settings = query.rerank_settings + + if not rerank_settings or not rerank_settings.rerank_model_name: + # Should never reach this part of the flow without reranking settings + raise RuntimeError("Reranking flow should not be running") + + chunks_to_rerank = chunks[: rerank_settings.num_rerank] + + cross_encoder = RerankingModel( + model_name=rerank_settings.rerank_model_name, + provider_type=rerank_settings.rerank_provider_type, + api_key=rerank_settings.rerank_api_key, + ) + + passages = [ + f"{chunk.semantic_identifier or chunk.title or ''}\n{chunk.content}" + for chunk in chunks_to_rerank + ] + sim_scores_floats = cross_encoder.predict(query=query.query, passages=passages) - sim_scores = [numpy.array(scores) for scores in sim_scores_floats] + # Old logic to handle multiple cross-encoders preserved but not used + sim_scores = [numpy.array(sim_scores_floats)] raw_sim_scores = cast(numpy.ndarray, sum(sim_scores) / len(sim_scores)) @@ -74,15 +119,17 @@ def semantic_reranking( [enc_n_scores - cross_models_min for enc_n_scores in sim_scores] ) / len(sim_scores) - boosts = [translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks] - recency_multiplier = [chunk.recency_bias for chunk in chunks] + boosts = [ + translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks_to_rerank + ] + recency_multiplier = [chunk.recency_bias for chunk in chunks_to_rerank] boosted_sim_scores = shifted_sim_scores * boosts * recency_multiplier normalized_b_s_scores = (boosted_sim_scores + cross_models_min - model_min) / ( model_max - model_min ) orig_indices = [i for i in range(len(normalized_b_s_scores))] scored_results = list( - zip(normalized_b_s_scores, raw_sim_scores, chunks, orig_indices) + zip(normalized_b_s_scores, raw_sim_scores, chunks_to_rerank, orig_indices) ) scored_results.sort(key=lambda x: x[0], reverse=True) ranked_sim_scores, ranked_raw_scores, ranked_chunks, ranked_indices = zip( @@ -133,12 +180,16 @@ def rerank_sections( """ chunks_to_rerank = [section.center_chunk for section in sections_to_rerank] + if not query.rerank_settings: + # Should never reach this part of the flow without reranking settings + raise RuntimeError("Reranking settings not found") + ranked_chunks, _ = semantic_reranking( - query=query.query, - chunks=chunks_to_rerank[: query.num_rerank], + query=query, + chunks=chunks_to_rerank, rerank_metrics_callback=rerank_metrics_callback, ) - lower_chunks = chunks_to_rerank[query.num_rerank :] + lower_chunks = chunks_to_rerank[query.rerank_settings.num_rerank :] # Scores from rerank cannot be meaningfully combined with scores without rerank # However the ordering is still important @@ -172,11 +223,17 @@ def filter_sections( section.center_chunk.content if use_chunk else section.combined_content for section in sections_to_filter ] + metadata_list = [section.center_chunk.metadata for section in sections_to_filter] + titles = [ + section.center_chunk.semantic_identifier for section in sections_to_filter + ] llm_chunk_selection = llm_batch_eval_sections( query=query.query, section_contents=contents, llm=llm, + titles=titles, + metadata_list=metadata_list, ) return [ @@ -191,12 +248,22 @@ def search_postprocessing( retrieved_sections: list[InferenceSection], llm: LLM, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, -) -> Iterator[list[InferenceSection] | list[int]]: +) -> Iterator[list[InferenceSection] | list[SectionRelevancePiece]]: post_processing_tasks: list[FunctionCall] = [] + if not retrieved_sections: + # Avoids trying to rerank an empty list which throws an error + yield [] + yield [] + return + rerank_task_id = None sections_yielded = False - if should_rerank(search_query): + if ( + search_query.rerank_settings + and search_query.rerank_settings.rerank_model_name + and search_query.rerank_settings.num_rerank > 0 + ): post_processing_tasks.append( FunctionCall( rerank_sections, @@ -217,7 +284,10 @@ def search_postprocessing( sections_yielded = True llm_filter_task_id = None - if should_apply_llm_based_relevance_filter(search_query): + if search_query.evaluation_type in [ + LLMEvaluationType.BASIC, + LLMEvaluationType.UNSPECIFIED, + ]: post_processing_tasks.append( FunctionCall( filter_sections, @@ -248,17 +318,21 @@ def search_postprocessing( _log_top_section_links(search_query.search_type.value, reranked_sections) yield reranked_sections - llm_section_selection = cast( - list[str] | None, - post_processing_results.get(str(llm_filter_task_id)) + llm_selected_section_ids = ( + [ + section.center_chunk.unique_id + for section in post_processing_results.get(str(llm_filter_task_id), []) + ] if llm_filter_task_id - else None, + else [] ) - if llm_section_selection is not None: - yield [ - index - for index, section in enumerate(reranked_sections or retrieved_sections) - if section.center_chunk.unique_id in llm_section_selection - ] - else: - yield cast(list[int], []) + + yield [ + SectionRelevancePiece( + document_id=section.center_chunk.document_id, + chunk_id=section.center_chunk.chunk_id, + relevant=section.center_chunk.unique_id in llm_selected_section_ids, + content="", + ) + for section in (reranked_sections or retrieved_sections) + ] diff --git a/backend/danswer/search/postprocessing/reranker.py b/backend/danswer/search/postprocessing/reranker.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/search/preprocessing/danswer_helper.py b/backend/danswer/search/preprocessing/danswer_helper.py deleted file mode 100644 index 88e465dacb5..00000000000 --- a/backend/danswer/search/preprocessing/danswer_helper.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import TYPE_CHECKING - -from danswer.search.enums import QueryFlow -from danswer.search.models import SearchType -from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation -from danswer.search.search_nlp_models import get_default_tokenizer -from danswer.search.search_nlp_models import IntentModel -from danswer.server.query_and_chat.models import HelperResponse -from danswer.utils.logger import setup_logger - -logger = setup_logger() - -if TYPE_CHECKING: - from transformers import AutoTokenizer # type:ignore - - -def count_unk_tokens(text: str, tokenizer: "AutoTokenizer") -> int: - """Unclear if the wordpiece tokenizer used is actually tokenizing anything as the [UNK] token - It splits up even foreign characters and unicode emojis without using UNK""" - tokenized_text = tokenizer.tokenize(text) - num_unk_tokens = len( - [token for token in tokenized_text if token == tokenizer.unk_token] - ) - logger.debug(f"Total of {num_unk_tokens} UNKNOWN tokens found") - return num_unk_tokens - - -def query_intent(query: str) -> tuple[SearchType, QueryFlow]: - intent_model = IntentModel() - class_probs = intent_model.predict(query) - keyword = class_probs[0] - semantic = class_probs[1] - qa = class_probs[2] - - # Heavily bias towards QA, from user perspective, answering a statement is not as bad as not answering a question - if qa > 20: - # If one class is very certain, choose it still - if keyword > 70: - predicted_search = SearchType.KEYWORD - predicted_flow = QueryFlow.SEARCH - elif semantic > 70: - predicted_search = SearchType.SEMANTIC - predicted_flow = QueryFlow.SEARCH - # If it's a QA question, it must be a "Semantic" style statement/question - else: - predicted_search = SearchType.SEMANTIC - predicted_flow = QueryFlow.QUESTION_ANSWER - # If definitely not a QA question, choose between keyword or semantic search - elif keyword > semantic: - predicted_search = SearchType.KEYWORD - predicted_flow = QueryFlow.SEARCH - else: - predicted_search = SearchType.SEMANTIC - predicted_flow = QueryFlow.SEARCH - - logger.debug(f"Predicted Search: {predicted_search}") - logger.debug(f"Predicted Flow: {predicted_flow}") - return predicted_search, predicted_flow - - -def recommend_search_flow( - query: str, - model_name: str, - keyword: bool = False, - max_percent_stopwords: float = 0.30, # ~Every third word max, ie "effects of caffeine" still viable keyword search -) -> HelperResponse: - heuristic_search_type: SearchType | None = None - message: str | None = None - - # Heuristics based decisions - words = query.split() - non_stopwords = remove_stop_words_and_punctuation(query) - non_stopword_percent = len(non_stopwords) / len(words) - - # UNK tokens -> suggest Keyword (still may be valid QA) - if count_unk_tokens(query, get_default_tokenizer(model_name=model_name)) > 0: - if not keyword: - heuristic_search_type = SearchType.KEYWORD - message = "Unknown tokens in query." - - # Too many stop words, most likely a Semantic query (still may be valid QA) - if non_stopword_percent < 1 - max_percent_stopwords: - if keyword: - heuristic_search_type = SearchType.SEMANTIC - message = "Stopwords in query" - - # Model based decisions - model_search_type, flow = query_intent(query) - if not message: - if model_search_type == SearchType.SEMANTIC and keyword: - message = "Intent model classified Semantic Search" - if model_search_type == SearchType.KEYWORD and not keyword: - message = "Intent model classified Keyword Search." - - return HelperResponse( - values={ - "flow": flow, - "search_type": model_search_type - if heuristic_search_type is None - else heuristic_search_type, - }, - details=[message] if message else [], - ) diff --git a/backend/danswer/search/preprocessing/preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py index bb2449efe52..43a6a43ce88 100644 --- a/backend/danswer/search/preprocessing/preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -1,32 +1,44 @@ from sqlalchemy.orm import Session from danswer.configs.chat_configs import BASE_RECENCY_DECAY -from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER +from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE +from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER +from danswer.configs.chat_configs import HYBRID_ALPHA +from danswer.configs.chat_configs import HYBRID_ALPHA_KEYWORD +from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings from danswer.llm.interfaces import LLM -from danswer.search.enums import QueryFlow +from danswer.natural_language_processing.search_nlp_models import QueryAnalysisModel +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import RecencyBiasSetting +from danswer.search.enums import SearchType from danswer.search.models import BaseFilters from danswer.search.models import IndexFilters +from danswer.search.models import RerankingDetails from danswer.search.models import SearchQuery from danswer.search.models import SearchRequest -from danswer.search.models import SearchType from danswer.search.preprocessing.access_filters import build_access_filters_for_user -from danswer.search.preprocessing.danswer_helper import query_intent +from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation from danswer.secondary_llm_flows.source_filter import extract_source_filter from danswer.secondary_llm_flows.time_filter import extract_time_filter from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import FunctionCall from danswer.utils.threadpool_concurrency import run_functions_in_parallel from danswer.utils.timing import log_function_time -from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW logger = setup_logger() +def query_analysis(query: str) -> tuple[bool, list[str]]: + analysis_model = QueryAnalysisModel() + return analysis_model.predict(query) + + @log_function_time(print_only=True) def retrieval_preprocessing( search_request: SearchRequest, @@ -34,11 +46,10 @@ def retrieval_preprocessing( llm: LLM, db_session: Session, bypass_acl: bool = False, - include_query_intent: bool = True, - disable_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER, + skip_query_analysis: bool = False, base_recency_decay: float = BASE_RECENCY_DECAY, favor_recent_decay_multiplier: float = FAVOR_RECENT_DECAY_MULTIPLIER, -) -> tuple[SearchQuery, SearchType | None, QueryFlow | None]: +) -> SearchQuery: """Logic is as follows: Any global disables apply first Then any filters or settings as part of the query are used @@ -68,6 +79,8 @@ def retrieval_preprocessing( logger.debug("Persona disables auto detect filters") auto_detect_time_filter = False auto_detect_source_filter = False + else: + logger.debug("Auto detect filters enabled") if ( time_filter is not None @@ -95,10 +108,8 @@ def retrieval_preprocessing( else None ) - # NOTE: this isn't really part of building the retrieval request, but is done here - # so it can be simply done in parallel with the filters without multi-level multithreading - run_query_intent = ( - FunctionCall(query_intent, (query,), {}) if include_query_intent else None + run_query_analysis = ( + None if skip_query_analysis else FunctionCall(query_analysis, (query,), {}) ) functions_to_run = [ @@ -106,7 +117,7 @@ def retrieval_preprocessing( for filter_fn in [ run_time_filters, run_source_filters, - run_query_intent, + run_query_analysis, ] if filter_fn ] @@ -120,12 +131,23 @@ def retrieval_preprocessing( predicted_source_filters = ( parallel_results[run_source_filters.result_id] if run_source_filters else None ) - predicted_search_type, predicted_flow = ( - parallel_results[run_query_intent.result_id] - if run_query_intent + + # The extracted keywords right now are not very reliable, not using for now + # Can maybe use for highlighting + is_keyword, extracted_keywords = ( + parallel_results[run_query_analysis.result_id] + if run_query_analysis else (None, None) ) + all_query_terms = query.split() + processed_keywords = ( + remove_stop_words_and_punctuation(all_query_terms) + # If the user is using a different language, don't edit the query or remove english stopwords + if not search_request.multilingual_expansion + else all_query_terms + ) + user_acl_filters = ( None if bypass_acl else build_access_filters_for_user(user, db_session) ) @@ -137,22 +159,32 @@ def retrieval_preprocessing( access_control_list=user_acl_filters, ) - llm_chunk_filter = False - if search_request.skip_llm_chunk_filter is not None: - llm_chunk_filter = not search_request.skip_llm_chunk_filter - elif persona: - llm_chunk_filter = persona.llm_relevance_filter + llm_evaluation_type = LLMEvaluationType.BASIC + if search_request.evaluation_type is not LLMEvaluationType.UNSPECIFIED: + llm_evaluation_type = search_request.evaluation_type - if disable_llm_chunk_filter: - if llm_chunk_filter: + elif persona: + llm_evaluation_type = ( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ) + + if DISABLE_LLM_DOC_RELEVANCE: + if llm_evaluation_type: logger.info( "LLM chunk filtering would have run but has been globally disabled" ) - llm_chunk_filter = False + llm_evaluation_type = LLMEvaluationType.SKIP + + rerank_settings = search_request.rerank_settings + # If not explicitly specified by the query, use the current settings + if rerank_settings is None: + search_settings = get_current_search_settings(db_session) - skip_rerank = search_request.skip_rerank - if skip_rerank is None: - skip_rerank = not ENABLE_RERANKING_REAL_TIME_FLOW + # For non-streaming flows, the rerank settings are applied at the search_request level + if not search_settings.disable_rerank_for_streaming: + rerank_settings = RerankingDetails.from_db_model(search_settings) # Decays at 1 / (1 + (multiplier * num years)) if persona and persona.recency_bias == RecencyBiasSetting.NO_DECAY: @@ -167,20 +199,42 @@ def retrieval_preprocessing( else: recency_bias_multiplier = base_recency_decay - return ( - SearchQuery( - query=query, - search_type=persona.search_type if persona else SearchType.HYBRID, - filters=final_filters, - recency_bias_multiplier=recency_bias_multiplier, - num_hits=limit if limit is not None else NUM_RETURNED_HITS, - offset=offset or 0, - skip_rerank=skip_rerank, - skip_llm_chunk_filter=not llm_chunk_filter, - chunks_above=search_request.chunks_above, - chunks_below=search_request.chunks_below, - full_doc=search_request.full_doc, - ), - predicted_search_type, - predicted_flow, + hybrid_alpha = HYBRID_ALPHA_KEYWORD if is_keyword else HYBRID_ALPHA + if search_request.hybrid_alpha: + hybrid_alpha = search_request.hybrid_alpha + + # Search request overrides anything else as it's explicitly set by the request + # If not explicitly specified, use the persona settings if they exist + # Otherwise, use the global defaults + chunks_above = ( + search_request.chunks_above + if search_request.chunks_above is not None + else (persona.chunks_above if persona else CONTEXT_CHUNKS_ABOVE) + ) + chunks_below = ( + search_request.chunks_below + if search_request.chunks_below is not None + else (persona.chunks_below if persona else CONTEXT_CHUNKS_BELOW) + ) + + return SearchQuery( + query=query, + processed_keywords=processed_keywords, + search_type=SearchType.KEYWORD if is_keyword else SearchType.SEMANTIC, + evaluation_type=llm_evaluation_type, + filters=final_filters, + hybrid_alpha=hybrid_alpha, + recency_bias_multiplier=recency_bias_multiplier, + num_hits=limit if limit is not None else NUM_RETURNED_HITS, + offset=offset or 0, + rerank_settings=rerank_settings, + # Should match the LLM filtering to the same as the reranked, it's understood as this is the number of results + # the user wants to do heavier processing on, so do the same for the LLM if reranking is on + # if no reranking settings are set, then use the global default + max_llm_filter_sections=rerank_settings.num_rerank + if rerank_settings + else NUM_POSTPROCESSED_RESULTS, + chunks_above=chunks_above, + chunks_below=chunks_below, + full_doc=search_request.full_doc, ) diff --git a/backend/danswer/search/retrieval/search_runner.py b/backend/danswer/search/retrieval/search_runner.py index 3313d243942..31582f90819 100644 --- a/backend/danswer/search/retrieval/search_runner.py +++ b/backend/danswer/search/retrieval/search_runner.py @@ -7,20 +7,23 @@ from nltk.tokenize import word_tokenize # type:ignore from sqlalchemy.orm import Session -from danswer.configs.chat_configs import HYBRID_ALPHA -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION -from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_multilingual_expansion from danswer.document_index.interfaces import DocumentIndex -from danswer.search.enums import EmbedTextType +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.vespa.shared_utils.utils import ( + replace_invalid_doc_id_characters, +) +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel from danswer.search.models import ChunkMetric from danswer.search.models import IndexFilters from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceChunkUncleaned from danswer.search.models import InferenceSection from danswer.search.models import MAX_METRICS_CONTENT from danswer.search.models import RetrievalMetricsContainer from danswer.search.models import SearchQuery -from danswer.search.models import SearchType -from danswer.search.search_nlp_models import EmbeddingModel +from danswer.search.postprocessing.postprocessing import cleanup_chunks from danswer.search.utils import inference_section_from_chunks from danswer.secondary_llm_flows.query_expansion import multilingual_query_expansion from danswer.utils.logger import setup_logger @@ -28,6 +31,7 @@ from danswer.utils.timing import log_function_time from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.enums import EmbedTextType logger = setup_logger() @@ -53,19 +57,24 @@ def download_nltk_data() -> None: logger.error(f"Failed to download {resource_name}. Error: {e}") -def lemmatize_text(text: str) -> list[str]: +def lemmatize_text(keywords: list[str]) -> list[str]: try: + query = " ".join(keywords) lemmatizer = WordNetLemmatizer() - word_tokens = word_tokenize(text) - return [lemmatizer.lemmatize(word) for word in word_tokens] + word_tokens = word_tokenize(query) + lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokens] + combined_keywords = list(set(keywords + lemmatized_words)) + return combined_keywords except Exception: - return text.split(" ") + return keywords -def remove_stop_words_and_punctuation(text: str) -> list[str]: +def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]: try: + # Re-tokenize using the NLTK tokenizer for better matching + query = " ".join(keywords) stop_words = set(stopwords.words("english")) - word_tokens = word_tokenize(text) + word_tokens = word_tokenize(query) text_trimmed = [ word for word in word_tokens @@ -73,15 +82,7 @@ def remove_stop_words_and_punctuation(text: str) -> list[str]: ] return text_trimmed or word_tokens except Exception: - return text.split(" ") - - -def query_processing( - query: str, -) -> str: - query = " ".join(remove_stop_words_and_punctuation(query)) - query = " ".join(lemmatize_text(query)) - return query + return keywords def combine_retrieval_results( @@ -113,54 +114,100 @@ def doc_index_retrieval( query: SearchQuery, document_index: DocumentIndex, db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, ) -> list[InferenceChunk]: - if query.search_type == SearchType.KEYWORD: - top_chunks = document_index.keyword_retrieval( - query=query.query, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - ) - else: - db_embedding_model = get_current_db_embedding_model(db_session) - - model = EmbeddingModel( - model_name=db_embedding_model.model_name, - query_prefix=db_embedding_model.query_prefix, - passage_prefix=db_embedding_model.passage_prefix, - normalize=db_embedding_model.normalize, - # The below are globally set, this flow always uses the indexing one - server_host=MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, - ) + """ + This function performs the search to retrieve the chunks, + extracts chunks from the large chunks, persists the scores + from the large chunks to the referenced chunks, + dedupes the chunks, and cleans the chunks. + """ + search_settings = get_current_search_settings(db_session) + + model = EmbeddingModel.from_db_model( + search_settings=search_settings, + # The below are globally set, this flow always uses the indexing one + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) - query_embedding = model.encode([query.query], text_type=EmbedTextType.QUERY)[0] + query_embedding = model.encode([query.query], text_type=EmbedTextType.QUERY)[0] + + top_chunks = document_index.hybrid_retrieval( + query=query.query, + query_embedding=query_embedding, + final_keywords=query.processed_keywords, + filters=query.filters, + hybrid_alpha=query.hybrid_alpha, + time_decay_multiplier=query.recency_bias_multiplier, + num_to_retrieve=query.num_hits, + offset=query.offset, + ) - if query.search_type == SearchType.SEMANTIC: - top_chunks = document_index.semantic_retrieval( - query=query.query, - query_embedding=query_embedding, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, + retrieval_requests: list[VespaChunkRequest] = [] + normal_chunks: list[InferenceChunkUncleaned] = [] + referenced_chunk_scores: dict[tuple[str, int], float] = {} + for chunk in top_chunks: + if chunk.large_chunk_reference_ids: + retrieval_requests.append( + VespaChunkRequest( + document_id=replace_invalid_doc_id_characters(chunk.document_id), + min_chunk_ind=chunk.large_chunk_reference_ids[0], + max_chunk_ind=chunk.large_chunk_reference_ids[-1], + ) ) + # for each referenced chunk, persist the + # highest score to the referenced chunk + for chunk_id in chunk.large_chunk_reference_ids: + key = (chunk.document_id, chunk_id) + referenced_chunk_scores[key] = max( + referenced_chunk_scores.get(key, 0), chunk.score or 0 + ) + else: + normal_chunks.append(chunk) - elif query.search_type == SearchType.HYBRID: - top_chunks = document_index.hybrid_retrieval( - query=query.query, - query_embedding=query_embedding, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - offset=query.offset, - hybrid_alpha=hybrid_alpha, - ) + # If there are no large chunks, just return the normal chunks + if not retrieval_requests: + return cleanup_chunks(normal_chunks) + # Retrieve and return the referenced normal chunks from the large chunks + retrieved_inference_chunks = document_index.id_based_retrieval( + chunk_requests=retrieval_requests, + filters=query.filters, + batch_retrieval=True, + ) + + # Apply the scores from the large chunks to the chunks referenced + # by each large chunk + for chunk in retrieved_inference_chunks: + if (chunk.document_id, chunk.chunk_id) in referenced_chunk_scores: + chunk.score = referenced_chunk_scores[(chunk.document_id, chunk.chunk_id)] + referenced_chunk_scores.pop((chunk.document_id, chunk.chunk_id)) else: - raise RuntimeError("Invalid Search Flow") + logger.error( + f"Chunk {chunk.document_id} {chunk.chunk_id} not found in referenced chunk scores" + ) - return top_chunks + # Log any chunks that were not found in the retrieved chunks + for reference in referenced_chunk_scores.keys(): + logger.error(f"Chunk {reference} not found in retrieved chunks") + + unique_chunks: dict[tuple[str, int], InferenceChunkUncleaned] = { + (chunk.document_id, chunk.chunk_id): chunk for chunk in normal_chunks + } + + # persist the highest score of each deduped chunk + for chunk in retrieved_inference_chunks: + key = (chunk.document_id, chunk.chunk_id) + # For duplicates, keep the highest score + if key not in unique_chunks or (chunk.score or 0) > ( + unique_chunks[key].score or 0 + ): + unique_chunks[key] = chunk + + # Deduplicate the chunks + deduped_chunks = list(unique_chunks.values()) + deduped_chunks.sort(key=lambda chunk: chunk.score or 0, reverse=True) + return cleanup_chunks(deduped_chunks) def _simplify_text(text: str) -> str: @@ -173,19 +220,16 @@ def retrieve_chunks( query: SearchQuery, document_index: DocumentIndex, db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, # Only applicable to hybrid search - multilingual_expansion_str: str | None = MULTILINGUAL_QUERY_EXPANSION, retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] | None = None, ) -> list[InferenceChunk]: """Returns a list of the best chunks from an initial keyword/semantic/ hybrid search.""" + + multilingual_expansion = get_multilingual_expansion(db_session) # Don't do query expansion on complex queries, rephrasings likely would not work well - if not multilingual_expansion_str or "\n" in query.query or "\r" in query.query: + if not multilingual_expansion or "\n" in query.query or "\r" in query.query: top_chunks = doc_index_retrieval( - query=query, - document_index=document_index, - db_session=db_session, - hybrid_alpha=hybrid_alpha, + query=query, document_index=document_index, db_session=db_session ) else: simplified_queries = set() @@ -193,7 +237,7 @@ def retrieve_chunks( # Currently only uses query expansion on multilingual use cases query_rephrases = multilingual_query_expansion( - query.query, multilingual_expansion_str + query.query, multilingual_expansion ) # Just to be extra sure, add the original query. query_rephrases.append(query.query) @@ -209,15 +253,15 @@ def retrieve_chunks( run_queries.append( ( doc_index_retrieval, - (q_copy, document_index, db_session, hybrid_alpha), + (q_copy, document_index, db_session), ) ) parallel_search_results = run_functions_tuples_in_parallel(run_queries) top_chunks = combine_retrieval_results(parallel_search_results) if not top_chunks: - logger.info( - f"{query.search_type.value.capitalize()} search returned no results " + logger.warning( + f"Hybrid ({query.search_type.value.capitalize()}) search returned no results " f"with filters: {query.filters}" ) return [] @@ -246,34 +290,42 @@ def inference_sections_from_ids( document_index: DocumentIndex, ) -> list[InferenceSection]: # Currently only fetches whole docs - doc_ids_set = set(doc_id for doc_id, chunk_id in doc_identifiers) + doc_ids_set = set(doc_id for doc_id, _ in doc_identifiers) + + chunk_requests: list[VespaChunkRequest] = [ + VespaChunkRequest(document_id=doc_id) for doc_id in doc_ids_set + ] # No need for ACL here because the doc ids were validated beforehand filters = IndexFilters(access_control_list=None) - functions_with_args: list[tuple[Callable, tuple]] = [ - (document_index.id_based_retrieval, (doc_id, None, None, filters)) - for doc_id in doc_ids_set - ] - - parallel_results = run_functions_tuples_in_parallel( - functions_with_args, allow_failures=True + retrieved_chunks = document_index.id_based_retrieval( + chunk_requests=chunk_requests, + filters=filters, ) - # Any failures to retrieve would give a None, drop the Nones and empty lists - inference_chunks_sets = [res for res in parallel_results if res] + cleaned_chunks = cleanup_chunks(retrieved_chunks) + if not cleaned_chunks: + return [] - return [ - inference_section - for inference_section in [ - inference_section_from_chunks( + # Group chunks by document ID + chunks_by_doc_id: dict[str, list[InferenceChunk]] = {} + for chunk in cleaned_chunks: + chunks_by_doc_id.setdefault(chunk.document_id, []).append(chunk) + + inference_sections = [ + section + for chunks in chunks_by_doc_id.values() + if chunks + and ( + section := inference_section_from_chunks( # The scores will always be 0 because the fetching by id gives back # no search scores. This is not needed though if the user is explicitly # selecting a document. - center_chunk=chunk_set[0], - chunks=chunk_set, + center_chunk=chunks[0], + chunks=chunks, ) - for chunk_set in inference_chunks_sets - ] - if inference_section is not None + ) ] + + return inference_sections diff --git a/backend/danswer/search/search_nlp_models.py b/backend/danswer/search/search_nlp_models.py deleted file mode 100644 index 761d9aa791f..00000000000 --- a/backend/danswer/search/search_nlp_models.py +++ /dev/null @@ -1,204 +0,0 @@ -import gc -import os -import time -from typing import Optional -from typing import TYPE_CHECKING - -import requests -from transformers import logging as transformer_logging # type:ignore - -from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL -from danswer.search.enums import EmbedTextType -from danswer.utils.logger import setup_logger -from shared_configs.configs import MODEL_SERVER_HOST -from shared_configs.configs import MODEL_SERVER_PORT -from shared_configs.model_server_models import EmbedRequest -from shared_configs.model_server_models import EmbedResponse -from shared_configs.model_server_models import IntentRequest -from shared_configs.model_server_models import IntentResponse -from shared_configs.model_server_models import RerankRequest -from shared_configs.model_server_models import RerankResponse - -transformer_logging.set_verbosity_error() - -os.environ["TOKENIZERS_PARALLELISM"] = "false" -os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" -os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1" - -logger = setup_logger() - - -if TYPE_CHECKING: - from transformers import AutoTokenizer # type: ignore - - -_TOKENIZER: tuple[Optional["AutoTokenizer"], str | None] = (None, None) - - -def clean_model_name(model_str: str) -> str: - return model_str.replace("/", "_").replace("-", "_").replace(".", "_") - - -# NOTE: If None is used, it may not be using the "correct" tokenizer, for cases -# where this is more important, be sure to refresh with the actual model name -def get_default_tokenizer(model_name: str | None = None) -> "AutoTokenizer": - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - from transformers import AutoTokenizer # type: ignore - - global _TOKENIZER - if _TOKENIZER[0] is None or ( - _TOKENIZER[1] is not None and _TOKENIZER[1] != model_name - ): - if _TOKENIZER[0] is not None: - del _TOKENIZER - gc.collect() - - if model_name is None: - # This could be inaccurate - model_name = DOCUMENT_ENCODER_MODEL - - _TOKENIZER = (AutoTokenizer.from_pretrained(model_name), model_name) - - if hasattr(_TOKENIZER[0], "is_fast") and _TOKENIZER[0].is_fast: - os.environ["TOKENIZERS_PARALLELISM"] = "false" - - return _TOKENIZER[0] - - -def build_model_server_url( - model_server_host: str, - model_server_port: int, -) -> str: - model_server_url = f"{model_server_host}:{model_server_port}" - - # use protocol if provided - if "http" in model_server_url: - return model_server_url - - # otherwise default to http - return f"http://{model_server_url}" - - -class EmbeddingModel: - def __init__( - self, - model_name: str, - query_prefix: str | None, - passage_prefix: str | None, - normalize: bool, - server_host: str, # Changes depending on indexing or inference - server_port: int, - # The following are globals are currently not configurable - max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE, - ) -> None: - self.model_name = model_name - self.max_seq_length = max_seq_length - self.query_prefix = query_prefix - self.passage_prefix = passage_prefix - self.normalize = normalize - - model_server_url = build_model_server_url(server_host, server_port) - self.embed_server_endpoint = f"{model_server_url}/encoder/bi-encoder-embed" - - def encode(self, texts: list[str], text_type: EmbedTextType) -> list[list[float]]: - if text_type == EmbedTextType.QUERY and self.query_prefix: - prefixed_texts = [self.query_prefix + text for text in texts] - elif text_type == EmbedTextType.PASSAGE and self.passage_prefix: - prefixed_texts = [self.passage_prefix + text for text in texts] - else: - prefixed_texts = texts - - embed_request = EmbedRequest( - texts=prefixed_texts, - model_name=self.model_name, - max_context_length=self.max_seq_length, - normalize_embeddings=self.normalize, - ) - - response = requests.post(self.embed_server_endpoint, json=embed_request.dict()) - response.raise_for_status() - - return EmbedResponse(**response.json()).embeddings - - -class CrossEncoderEnsembleModel: - def __init__( - self, - model_server_host: str = MODEL_SERVER_HOST, - model_server_port: int = MODEL_SERVER_PORT, - ) -> None: - model_server_url = build_model_server_url(model_server_host, model_server_port) - self.rerank_server_endpoint = model_server_url + "/encoder/cross-encoder-scores" - - def predict(self, query: str, passages: list[str]) -> list[list[float]]: - rerank_request = RerankRequest(query=query, documents=passages) - - response = requests.post( - self.rerank_server_endpoint, json=rerank_request.dict() - ) - response.raise_for_status() - - return RerankResponse(**response.json()).scores - - -class IntentModel: - def __init__( - self, - model_server_host: str = MODEL_SERVER_HOST, - model_server_port: int = MODEL_SERVER_PORT, - ) -> None: - model_server_url = build_model_server_url(model_server_host, model_server_port) - self.intent_server_endpoint = model_server_url + "/custom/intent-model" - - def predict( - self, - query: str, - ) -> list[float]: - intent_request = IntentRequest(query=query) - - response = requests.post( - self.intent_server_endpoint, json=intent_request.dict() - ) - response.raise_for_status() - - return IntentResponse(**response.json()).class_probs - - -def warm_up_encoders( - model_name: str, - normalize: bool, - model_server_host: str = MODEL_SERVER_HOST, - model_server_port: int = MODEL_SERVER_PORT, -) -> None: - warm_up_str = ( - "Danswer is amazing! Check out our easy deployment guide at " - "https://docs.danswer.dev/quickstart" - ) - - get_default_tokenizer(model_name=model_name)(warm_up_str) - - embed_model = EmbeddingModel( - model_name=model_name, - normalize=normalize, - # Not a big deal if prefix is incorrect - query_prefix=None, - passage_prefix=None, - server_host=model_server_host, - server_port=model_server_port, - ) - - # First time downloading the models it may take even longer, but just in case, - # retry the whole server - wait_time = 5 - for attempt in range(20): - try: - embed_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) - return - except Exception: - logger.exception( - f"Failed to run test embedding, retrying in {wait_time} seconds..." - ) - time.sleep(wait_time) - raise Exception("Failed to run test embedding.") diff --git a/backend/danswer/search/search_settings.py b/backend/danswer/search/search_settings.py new file mode 100644 index 00000000000..d502205dfe7 --- /dev/null +++ b/backend/danswer/search/search_settings.py @@ -0,0 +1,30 @@ +from typing import cast + +from danswer.configs.constants import KV_SEARCH_SETTINGS +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.search.models import SavedSearchSettings +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_kv_search_settings() -> SavedSearchSettings | None: + """Get all user configured search settings which affect the search pipeline + Note: KV store is used in this case since there is no need to rollback the value or any need to audit past values + + Note: for now we can't cache this value because if the API server is scaled, the cache could be out of sync + if the value is updated by another process/instance of the API server. If this reads from an in memory cache like + reddis then it will be ok. Until then this has some performance implications (though minor) + """ + kv_store = get_dynamic_config_store() + try: + return SavedSearchSettings(**cast(dict, kv_store.load(KV_SEARCH_SETTINGS))) + except ConfigNotFoundError: + return None + except Exception as e: + logger.error(f"Error loading search settings: {e}") + # Wiping it so that next server startup, it can load the defaults + # or the user can set it via the API/UI + kv_store.delete(KV_SEARCH_SETTINGS) + return None diff --git a/backend/danswer/search/utils.py b/backend/danswer/search/utils.py index 8b138d2e9b8..21a95320ef5 100644 --- a/backend/danswer/search/utils.py +++ b/backend/danswer/search/utils.py @@ -1,6 +1,7 @@ from collections.abc import Sequence from typing import TypeVar +from danswer.chat.models import SectionRelevancePiece from danswer.db.models import SearchDoc as DBSearchDoc from danswer.search.models import InferenceChunk from danswer.search.models import InferenceSection @@ -18,6 +19,14 @@ SavedSearchDocWithContent, ) +TSection = TypeVar( + "TSection", + InferenceSection, + SearchDoc, + SavedSearchDoc, + SavedSearchDocWithContent, +) + def dedupe_documents(items: list[T]) -> tuple[list[T], list[int]]: seen_ids = set() @@ -37,6 +46,35 @@ def dedupe_documents(items: list[T]) -> tuple[list[T], list[int]]: return deduped_items, dropped_indices +def relevant_sections_to_indices( + relevance_sections: list[SectionRelevancePiece] | None, items: list[TSection] +) -> list[int]: + if not relevance_sections: + return [] + + relevant_set = { + (chunk.document_id, chunk.chunk_id) + for chunk in relevance_sections + if chunk.relevant + } + + return [ + index + for index, item in enumerate(items) + if ( + ( + isinstance(item, InferenceSection) + and (item.center_chunk.document_id, item.center_chunk.chunk_id) + in relevant_set + ) + or ( + not isinstance(item, (InferenceSection)) + and (item.document_id, item.chunk_ind) in relevant_set + ) + ) + ] + + def drop_llm_indices( llm_indices: list[int], search_docs: Sequence[DBSearchDoc | SavedSearchDoc], diff --git a/backend/danswer/secondary_llm_flows/agentic_evaluation.py b/backend/danswer/secondary_llm_flows/agentic_evaluation.py new file mode 100644 index 00000000000..3de9db00be6 --- /dev/null +++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py @@ -0,0 +1,86 @@ +import re + +from danswer.chat.models import SectionRelevancePiece +from danswer.llm.interfaces import LLM +from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string +from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT +from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT +from danswer.search.models import InferenceSection +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def _get_agent_eval_messages( + title: str, content: str, query: str, center_metadata: str +) -> list[dict[str, str]]: + messages = [ + { + "role": "system", + "content": AGENTIC_SEARCH_SYSTEM_PROMPT, + }, + { + "role": "user", + "content": AGENTIC_SEARCH_USER_PROMPT.format( + title=title, + content=content, + query=query, + optional_metadata=center_metadata, + ), + }, + ] + return messages + + +def evaluate_inference_section( + document: InferenceSection, query: str, llm: LLM +) -> SectionRelevancePiece: + def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str: + metadata_str = "\n\nMetadata:\n" + for key, value in metadata.items(): + value_str = ", ".join(value) if isinstance(value, list) else value + metadata_str += f"{key} - {value_str}\n" + + # Since there is now multiple sections, add this prefix for clarity + return metadata_str + "\nContent:" + + document_id = document.center_chunk.document_id + semantic_id = document.center_chunk.semantic_identifier + contents = document.combined_content + center_metadata = document.center_chunk.metadata + center_metadata_str = _get_metadata_str(center_metadata) if center_metadata else "" + + messages = _get_agent_eval_messages( + title=semantic_id, + content=contents, + query=query, + center_metadata=center_metadata_str, + ) + filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) + + # Search for the "Useful Analysis" section in the model output + # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon, + # then any text up to "3. Final Relevance" + # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines + # If no match is found, the entire model output is used as the analysis + analysis_match = re.search( + r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance", + model_output, + re.DOTALL, + ) + analysis = analysis_match.group(1).strip() if analysis_match else model_output + + # Get the last non-empty line + last_line = next( + (line for line in reversed(model_output.split("\n")) if line.strip()), "" + ) + relevant = last_line.strip().lower().startswith("true") + + return SectionRelevancePiece( + document_id=document_id, + chunk_id=document.center_chunk.chunk_id, + relevant=relevant, + content=analysis, + ) diff --git a/backend/danswer/secondary_llm_flows/chat_session_naming.py b/backend/danswer/secondary_llm_flows/chat_session_naming.py index 9449eaded7a..9ca5f34a62f 100644 --- a/backend/danswer/secondary_llm_flows/chat_session_naming.py +++ b/backend/danswer/secondary_llm_flows/chat_session_naming.py @@ -1,8 +1,8 @@ from danswer.chat.chat_utils import combine_message_chain from danswer.configs.chat_configs import LANGUAGE_CHAT_NAMING_HINT -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF from danswer.db.models import ChatMessage +from danswer.db.search_settings import get_multilingual_expansion from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string @@ -22,7 +22,7 @@ def get_renamed_conversation_name( language_hint = ( f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}" - if bool(MULTILINGUAL_QUERY_EXPANSION) + if bool(get_multilingual_expansion()) else "" ) diff --git a/backend/danswer/secondary_llm_flows/chunk_usefulness.py b/backend/danswer/secondary_llm_flows/chunk_usefulness.py index b672a563d0e..b978244028f 100644 --- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py +++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py @@ -1,5 +1,6 @@ from collections.abc import Callable +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string @@ -11,17 +12,33 @@ logger = setup_logger() -def llm_eval_section(query: str, section_content: str, llm: LLM) -> bool: +def llm_eval_section( + query: str, + section_content: str, + llm: LLM, + title: str, + metadata: dict[str, str | list[str]], +) -> bool: + def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str: + metadata_str = "\nMetadata:\n" + for key, value in metadata.items(): + value_str = ", ".join(value) if isinstance(value, list) else value + metadata_str += f"{key} - {value_str}\n" + return metadata_str + def _get_usefulness_messages() -> list[dict[str, str]]: + metadata_str = _get_metadata_str(metadata) if metadata else "" messages = [ { "role": "user", "content": SECTION_FILTER_PROMPT.format( - chunk_text=section_content, user_query=query + title=title.replace("\n", " "), + chunk_text=section_content, + user_query=query, + optional_metadata=metadata_str, ), }, ] - return messages def _extract_usefulness(model_output: str) -> bool: @@ -33,9 +50,6 @@ def _extract_usefulness(model_output: str) -> bool: messages = _get_usefulness_messages() filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - # When running in a batch, it takes as long as the longest thread - # And when running a large batch, one may fail and take the whole timeout - # instead cap it to 5 seconds model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) @@ -43,12 +57,25 @@ def _extract_usefulness(model_output: str) -> bool: def llm_batch_eval_sections( - query: str, section_contents: list[str], llm: LLM, use_threads: bool = True + query: str, + section_contents: list[str], + llm: LLM, + titles: list[str], + metadata_list: list[dict[str, str | list[str]]], + use_threads: bool = True, ) -> list[bool]: + if DISABLE_LLM_DOC_RELEVANCE: + raise RuntimeError( + "LLM Doc Relevance is globally disabled, " + "this should have been caught upstream." + ) + if use_threads: functions_with_args: list[tuple[Callable, tuple]] = [ - (llm_eval_section, (query, section_content, llm)) - for section_content in section_contents + (llm_eval_section, (query, section_content, llm, title, metadata)) + for section_content, title, metadata in zip( + section_contents, titles, metadata_list + ) ] logger.debug( @@ -63,6 +90,8 @@ def llm_batch_eval_sections( else: return [ - llm_eval_section(query, section_content, llm) - for section_content in section_contents + llm_eval_section(query, section_content, llm, title, metadata) + for section_content, title, metadata in zip( + section_contents, titles, metadata_list + ) ] diff --git a/backend/danswer/secondary_llm_flows/query_expansion.py b/backend/danswer/secondary_llm_flows/query_expansion.py index e5a2b67e1fa..585af00bdc1 100644 --- a/backend/danswer/secondary_llm_flows/query_expansion.py +++ b/backend/danswer/secondary_llm_flows/query_expansion.py @@ -50,11 +50,10 @@ def _get_rephrase_messages() -> list[dict[str, str]]: def multilingual_query_expansion( query: str, - expansion_languages: str, + expansion_languages: list[str], use_threads: bool = True, ) -> list[str]: - languages = expansion_languages.split(",") - languages = [language.strip() for language in languages] + languages = [language.strip() for language in expansion_languages] if use_threads: functions_with_args: list[tuple[Callable, tuple]] = [ (llm_multilingual_query_expansion, (query, language)) @@ -94,7 +93,7 @@ def history_based_query_rephrase( llm: LLM, size_heuristic: int = 200, punctuation_heuristic: int = 10, - skip_first_rephrase: bool = False, + skip_first_rephrase: bool = True, prompt_template: str = HISTORY_QUERY_REPHRASE, ) -> str: # Globally disabled, just use the exact user query diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py index bbc1ef412b9..2ee428f0090 100644 --- a/backend/danswer/secondary_llm_flows/query_validation.py +++ b/backend/danswer/secondary_llm_flows/query_validation.py @@ -74,7 +74,7 @@ def stream_query_answerability( QueryValidationResponse( reasoning="Query Answerability Evaluation feature is turned off", answerable=True, - ).dict() + ).model_dump() ) return @@ -85,7 +85,7 @@ def stream_query_answerability( QueryValidationResponse( reasoning="Generative AI is turned off - skipping check", answerable=True, - ).dict() + ).model_dump() ) return messages = get_query_validation_messages(user_query) @@ -107,7 +107,7 @@ def stream_query_answerability( remaining = model_output[reason_ind + len(THOUGHT_PAT.upper()) :] if remaining: yield get_json_line( - DanswerAnswerPiece(answer_piece=remaining).dict() + DanswerAnswerPiece(answer_piece=remaining).model_dump() ) continue @@ -116,7 +116,7 @@ def stream_query_answerability( if hold_answerable == ANSWERABLE_PAT.upper()[: len(hold_answerable)]: continue yield get_json_line( - DanswerAnswerPiece(answer_piece=hold_answerable).dict() + DanswerAnswerPiece(answer_piece=hold_answerable).model_dump() ) hold_answerable = "" @@ -124,11 +124,13 @@ def stream_query_answerability( answerable = extract_answerability_bool(model_output) yield get_json_line( - QueryValidationResponse(reasoning=reasoning, answerable=answerable).dict() + QueryValidationResponse( + reasoning=reasoning, answerable=answerable + ).model_dump() ) except Exception as e: # exception is logged in the answer_question method, no need to re-log error = StreamingError(error=str(e)) - yield get_json_line(error.dict()) + yield get_json_line(error.model_dump()) logger.exception("Failed to validate Query") return diff --git a/backend/danswer/server/auth_check.py b/backend/danswer/server/auth_check.py index 53ef572daa3..12258eba29b 100644 --- a/backend/danswer/server/auth_check.py +++ b/backend/danswer/server/auth_check.py @@ -5,6 +5,7 @@ from starlette.routing import BaseRoute from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.configs.app_configs import APP_API_PREFIX from danswer.server.danswer_api.ingestion import api_key_dep @@ -93,6 +94,7 @@ def check_router_auth( if ( depends_fn == current_user or depends_fn == current_admin_user + or depends_fn == current_curator_or_admin_user or depends_fn == api_key_dep ): found_auth = True diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index 1b6e6d9852f..cea3ec86575 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -9,9 +9,10 @@ from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.document import get_documents_by_cc_pair from danswer.db.document import get_ingestion_documents -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model from danswer.db.engine import get_session +from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index from danswer.indexing.embedder import DefaultIndexingEmbedder @@ -31,7 +32,7 @@ @router.get("/connector-docs/{cc_pair_id}") def get_docs_by_connector_credential_pair( cc_pair_id: int, - _: str = Depends(api_key_dep), + _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> list[DocMinimalInfo]: db_docs = get_documents_by_cc_pair(cc_pair_id=cc_pair_id, db_session=db_session) @@ -47,7 +48,7 @@ def get_docs_by_connector_credential_pair( @router.get("/ingestion") def get_ingestion_docs( - _: str = Depends(api_key_dep), + _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> list[DocMinimalInfo]: db_docs = get_ingestion_documents(db_session) @@ -64,7 +65,7 @@ def get_ingestion_docs( @router.post("/ingestion") def upsert_ingestion_doc( doc_info: IngestionDocument, - _: str = Depends(api_key_dep), + _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> IngestionResult: doc_info.document.from_ingestion_api = True @@ -89,13 +90,10 @@ def upsert_ingestion_doc( primary_index_name=curr_ind_name, secondary_index_name=None ) - db_embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) - index_embedding_model = DefaultIndexingEmbedder( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - query_prefix=db_embedding_model.query_prefix, - passage_prefix=db_embedding_model.passage_prefix, + index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings( + search_settings=search_settings ) indexing_pipeline = build_indexing_pipeline( @@ -105,8 +103,8 @@ def upsert_ingestion_doc( db_session=db_session, ) - new_doc, chunks = indexing_pipeline( - documents=[document], + new_doc, __chunk_count = indexing_pipeline( + document_batch=[document], index_attempt_metadata=IndexAttemptMetadata( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id, @@ -119,19 +117,16 @@ def upsert_ingestion_doc( primary_index_name=curr_ind_name, secondary_index_name=None ) - sec_db_embedding_model = get_secondary_db_embedding_model(db_session) + sec_search_settings = get_secondary_search_settings(db_session) - if sec_db_embedding_model is None: + if sec_search_settings is None: # Should not ever happen raise RuntimeError( - "Secondary index exists but no embedding model configured" + "Secondary index exists but no search settings configured" ) - new_index_embedding_model = DefaultIndexingEmbedder( - model_name=sec_db_embedding_model.model_name, - normalize=sec_db_embedding_model.normalize, - query_prefix=sec_db_embedding_model.query_prefix, - passage_prefix=sec_db_embedding_model.passage_prefix, + new_index_embedding_model = DefaultIndexingEmbedder.from_db_search_settings( + search_settings=sec_search_settings ) sec_ind_pipeline = build_indexing_pipeline( @@ -142,7 +137,7 @@ def upsert_ingestion_doc( ) sec_ind_pipeline( - documents=[document], + document_batch=[document], index_attempt_metadata=IndexAttemptMetadata( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id, diff --git a/backend/danswer/server/danswer_api/models.py b/backend/danswer/server/danswer_api/models.py index 8a534c3e31b..17d6a32c05f 100644 --- a/backend/danswer/server/danswer_api/models.py +++ b/backend/danswer/server/danswer_api/models.py @@ -5,7 +5,7 @@ class IngestionDocument(BaseModel): document: DocumentBase - cc_pair_id: int | None + cc_pair_id: int | None = None class IngestionResult(BaseModel): @@ -16,4 +16,4 @@ class IngestionResult(BaseModel): class DocMinimalInfo(BaseModel): document_id: str semantic_id: str - link: str | None + link: str | None = None diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py index 861657a43ea..69ae9916348 100644 --- a/backend/danswer/server/documents/cc_pair.py +++ b/backend/danswer/server/documents/cc_pair.py @@ -1,23 +1,34 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from pydantic import BaseModel from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session -from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user -from danswer.background.celery.celery_utils import get_deletion_status +from danswer.background.celery.celery_utils import get_deletion_attempt_snapshot from danswer.db.connector_credential_pair import add_credential_to_connector from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import remove_credential_from_connector +from danswer.db.connector_credential_pair import ( + update_connector_credential_pair_from_id, +) from danswer.db.document import get_document_cnts_for_cc_pairs from danswer.db.engine import get_session -from danswer.db.index_attempt import get_index_attempts_for_cc_pair +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.index_attempt import cancel_indexing_attempts_for_ccpair +from danswer.db.index_attempt import cancel_indexing_attempts_past_model +from danswer.db.index_attempt import get_index_attempts_for_connector from danswer.db.models import User +from danswer.db.models import UserRole from danswer.server.documents.models import CCPairFullInfo from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorCredentialPairMetadata from danswer.server.models import StatusResponse +from danswer.utils.logger import setup_logger + +logger = setup_logger() router = APIRouter(prefix="/manage") @@ -25,27 +36,29 @@ @router.get("/admin/cc-pair/{cc_pair_id}") def get_cc_pair_full_info( cc_pair_id: int, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> CCPairFullInfo: cc_pair = get_connector_credential_pair_from_id( - cc_pair_id=cc_pair_id, - db_session=db_session, + cc_pair_id, db_session, user, get_editable=False ) - if cc_pair is None: + if not cc_pair: raise HTTPException( - status_code=400, - detail=f"Connector with ID {cc_pair_id} not found. Has it been deleted?", + status_code=404, detail="CC Pair not found for current user permissions" ) + editable_cc_pair = get_connector_credential_pair_from_id( + cc_pair_id, db_session, user, get_editable=True + ) + is_editable_for_current_user = editable_cc_pair is not None cc_pair_identifier = ConnectorCredentialPairIdentifier( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id, ) - index_attempts = get_index_attempts_for_cc_pair( - db_session=db_session, - cc_pair_identifier=cc_pair_identifier, + index_attempts = get_index_attempts_for_connector( + db_session, + cc_pair.connector_id, ) document_count_info_list = list( @@ -58,18 +71,82 @@ def get_cc_pair_full_info( document_count_info_list[0][-1] if document_count_info_list else 0 ) - latest_deletion_attempt = get_deletion_status( - connector_id=cc_pair.connector.id, - credential_id=cc_pair.credential.id, - db_session=db_session, - ) - return CCPairFullInfo.from_models( cc_pair_model=cc_pair, index_attempt_models=list(index_attempts), - latest_deletion_attempt=latest_deletion_attempt, + latest_deletion_attempt=get_deletion_attempt_snapshot( + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + db_session=db_session, + ), num_docs_indexed=documents_indexed, + is_editable_for_current_user=is_editable_for_current_user, + ) + + +class CCStatusUpdateRequest(BaseModel): + status: ConnectorCredentialPairStatus + + +@router.put("/admin/cc-pair/{cc_pair_id}/status") +def update_cc_pair_status( + cc_pair_id: int, + status_update_request: CCStatusUpdateRequest, + user: User | None = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> None: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=True, ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + if status_update_request.status == ConnectorCredentialPairStatus.PAUSED: + cancel_indexing_attempts_for_ccpair(cc_pair_id, db_session) + + # Just for good measure + cancel_indexing_attempts_past_model(db_session) + + update_connector_credential_pair_from_id( + db_session=db_session, + cc_pair_id=cc_pair_id, + status=status_update_request.status, + ) + + +@router.put("/admin/cc-pair/{cc_pair_id}/name") +def update_cc_pair_name( + cc_pair_id: int, + new_name: str, + user: User | None = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> StatusResponse[int]: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=True, + ) + if not cc_pair: + raise HTTPException( + status_code=400, detail="CC Pair not found for current user's permissions" + ) + + try: + cc_pair.name = new_name + db_session.commit() + return StatusResponse( + success=True, message="Name updated successfully", data=cc_pair_id + ) + except IntegrityError: + db_session.rollback() + raise HTTPException(status_code=400, detail="Name must be unique") @router.put("/connector/{connector_id}/credential/{credential_id}") @@ -77,18 +154,27 @@ def associate_credential_to_connector( connector_id: int, credential_id: int, metadata: ConnectorCredentialPairMetadata, - user: User | None = Depends(current_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse[int]: + if user and user.role != UserRole.ADMIN and metadata.is_public: + raise HTTPException( + status_code=400, + detail="Public connections cannot be created by non-admin users", + ) + try: - return add_credential_to_connector( + response = add_credential_to_connector( + db_session=db_session, + user=user, connector_id=connector_id, credential_id=credential_id, cc_pair_name=metadata.name, - is_public=metadata.is_public, - user=user, - db_session=db_session, + is_public=metadata.is_public or True, + groups=metadata.groups, ) + + return response except IntegrityError: raise HTTPException(status_code=400, detail="Name must be unique") diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index ad25523817d..8d6b0ffc773 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -5,6 +5,7 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Query from fastapi import Request from fastapi import Response from fastapi import UploadFile @@ -12,8 +13,9 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user -from danswer.background.celery.celery_utils import get_deletion_status +from danswer.background.celery.celery_utils import get_deletion_attempt_snapshot from danswer.configs.app_configs import ENABLED_CONNECTOR_TYPES from danswer.configs.constants import DocumentSource from danswer.configs.constants import FileOrigin @@ -51,6 +53,9 @@ from danswer.db.connector import fetch_connectors from danswer.db.connector import get_connector_credential_ids from danswer.db.connector import update_connector +from danswer.db.connector_credential_pair import add_credential_to_connector +from danswer.db.connector_credential_pair import get_cc_pair_groups_for_ids +from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.credentials import create_credential from danswer.db.credentials import delete_gmail_service_account_credentials @@ -58,14 +63,14 @@ from danswer.db.credentials import fetch_credential_by_id from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.document import get_document_cnts_for_cc_pairs -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session -from danswer.db.index_attempt import cancel_indexing_attempts_for_connector -from danswer.db.index_attempt import cancel_indexing_attempts_past_model from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempts_for_cc_pair +from danswer.db.index_attempt import get_latest_finished_index_attempt_for_cc_pair from danswer.db.index_attempt import get_latest_index_attempts from danswer.db.models import User +from danswer.db.models import UserRole +from danswer.db.search_settings import get_current_search_settings from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store from danswer.server.documents.models import AuthStatus @@ -74,6 +79,8 @@ from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorIndexingStatus from danswer.server.documents.models import ConnectorSnapshot +from danswer.server.documents.models import ConnectorUpdateRequest +from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import CredentialSnapshot from danswer.server.documents.models import FileUploadResponse from danswer.server.documents.models import GDriveCallback @@ -85,6 +92,9 @@ from danswer.server.documents.models import ObjectCreationIdResponse from danswer.server.documents.models import RunConnectorRequest from danswer.server.models import StatusResponse +from danswer.utils.logger import setup_logger + +logger = setup_logger() _GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id" _GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id" @@ -98,7 +108,7 @@ @router.get("/admin/connector/gmail/app-credential") def check_google_app_gmail_credentials_exist( - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: return {"client_id": get_google_app_gmail_cred().web.client_id} @@ -136,7 +146,7 @@ def delete_google_app_gmail_credentials( @router.get("/admin/connector/google-drive/app-credential") def check_google_app_credentials_exist( - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: return {"client_id": get_google_app_cred().web.client_id} @@ -174,7 +184,7 @@ def delete_google_app_credentials( @router.get("/admin/connector/gmail/service-account-key") def check_google_service_gmail_account_key_exist( - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: return {"service_account_email": get_gmail_service_account_key().client_email} @@ -214,7 +224,7 @@ def delete_google_service_gmail_account_key( @router.get("/admin/connector/google-drive/service-account-key") def check_google_service_account_key_exist( - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: return {"service_account_email": get_service_account_key().client_email} @@ -255,7 +265,7 @@ def delete_google_service_account_key( @router.put("/admin/connector/google-drive/service-account-credential") def upsert_service_account_credential( service_account_credential_request: GoogleServiceAccountCredentialRequest, - user: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> ObjectCreationIdResponse: """Special API which allows the creation of a credential for a service account. @@ -263,7 +273,8 @@ def upsert_service_account_credential( `Credential` table.""" try: credential_base = build_service_account_creds( - delegated_user_email=service_account_credential_request.google_drive_delegated_user + DocumentSource.GOOGLE_DRIVE, + delegated_user_email=service_account_credential_request.google_drive_delegated_user, ) except ConfigNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -280,7 +291,7 @@ def upsert_service_account_credential( @router.put("/admin/connector/gmail/service-account-credential") def upsert_gmail_service_account_credential( service_account_credential_request: GoogleServiceAccountCredentialRequest, - user: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> ObjectCreationIdResponse: """Special API which allows the creation of a credential for a service account. @@ -288,7 +299,8 @@ def upsert_gmail_service_account_credential( `Credential` table.""" try: credential_base = build_service_account_creds( - delegated_user_email=service_account_credential_request.gmail_delegated_user + DocumentSource.GMAIL, + delegated_user_email=service_account_credential_request.gmail_delegated_user, ) except ConfigNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -340,7 +352,7 @@ def admin_google_drive_auth( @router.post("/admin/connector/file/upload") def upload_files( files: list[UploadFile], - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> FileUploadResponse: for file in files: @@ -367,13 +379,21 @@ def upload_files( @router.get("/admin/connector/indexing-status") def get_connector_indexing_status( secondary_index: bool = False, - _: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), + get_editable: bool = Query( + False, description="If true, return editable document sets" + ), ) -> list[ConnectorIndexingStatus]: indexing_statuses: list[ConnectorIndexingStatus] = [] # TODO: make this one query - cc_pairs = get_connector_credential_pairs(db_session) + cc_pairs = get_connector_credential_pairs( + db_session=db_session, + user=user, + get_editable=get_editable, + ) + cc_pair_identifiers = [ ConnectorCredentialPairIdentifier( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id @@ -382,12 +402,15 @@ def get_connector_indexing_status( ] latest_index_attempts = get_latest_index_attempts( - connector_credential_pair_identifiers=cc_pair_identifiers, secondary_index=secondary_index, db_session=db_session, ) + cc_pair_to_latest_index_attempt = { - (index_attempt.connector_id, index_attempt.credential_id): index_attempt + ( + index_attempt.connector_credential_pair.connector_id, + index_attempt.connector_credential_pair.credential_id, + ): index_attempt for index_attempt in latest_index_attempts } @@ -400,6 +423,16 @@ def get_connector_indexing_status( for connector_id, credential_id, cnt in document_count_info } + group_cc_pair_relationships = get_cc_pair_groups_for_ids( + db_session=db_session, + cc_pair_ids=[cc_pair.id for cc_pair in cc_pairs], + ) + group_cc_pair_relationships_dict: dict[int, list[int]] = {} + for relationship in group_cc_pair_relationships: + group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append( + relationship.user_group_id + ) + for cc_pair in cc_pairs: # TODO remove this to enable ingestion API if cc_pair.name == "DefaultCCPair": @@ -410,30 +443,44 @@ def get_connector_indexing_status( latest_index_attempt = cc_pair_to_latest_index_attempt.get( (connector.id, credential.id) ) + + latest_finished_attempt = get_latest_finished_index_attempt_for_cc_pair( + connector_credential_pair_id=cc_pair.id, + secondary_index=secondary_index, + db_session=db_session, + ) + indexing_statuses.append( ConnectorIndexingStatus( cc_pair_id=cc_pair.id, name=cc_pair.name, + cc_pair_status=cc_pair.status, connector=ConnectorSnapshot.from_connector_db_model(connector), credential=CredentialSnapshot.from_credential_db_model(credential), public_doc=cc_pair.is_public, owner=credential.user.email if credential.user else "", - last_status=latest_index_attempt.status - if latest_index_attempt - else None, + groups=group_cc_pair_relationships_dict.get(cc_pair.id, []), + last_finished_status=( + latest_finished_attempt.status if latest_finished_attempt else None + ), + last_status=( + latest_index_attempt.status if latest_index_attempt else None + ), last_success=cc_pair.last_successful_index_time, docs_indexed=cc_pair_to_document_cnt.get( (connector.id, credential.id), 0 ), - error_msg=latest_index_attempt.error_msg - if latest_index_attempt - else None, - latest_index_attempt=IndexAttemptSnapshot.from_index_attempt_db_model( - latest_index_attempt - ) - if latest_index_attempt - else None, - deletion_attempt=get_deletion_status( + error_msg=( + latest_index_attempt.error_msg if latest_index_attempt else None + ), + latest_index_attempt=( + IndexAttemptSnapshot.from_index_attempt_db_model( + latest_index_attempt + ) + if latest_index_attempt + else None + ), + deletion_attempt=get_deletion_attempt_snapshot( connector_id=connector.id, credential_id=credential.id, db_session=db_session, @@ -467,15 +514,91 @@ def _validate_connector_allowed(source: DocumentSource) -> None: ) +def _check_connector_permissions( + connector_data: ConnectorUpdateRequest, user: User | None +) -> ConnectorBase: + """ + This is not a proper permission check, but this should prevent curators creating bad situations + until a long-term solution is implemented (Replacing CC pairs/Connectors with Connections) + """ + if user and user.role != UserRole.ADMIN: + if connector_data.is_public: + raise HTTPException( + status_code=400, + detail="Public connectors can only be created by admins", + ) + if not connector_data.groups: + raise HTTPException( + status_code=400, + detail="Connectors created by curators must have groups", + ) + return ConnectorBase( + name=connector_data.name, + source=connector_data.source, + input_type=connector_data.input_type, + connector_specific_config=connector_data.connector_specific_config, + refresh_freq=connector_data.refresh_freq, + prune_freq=connector_data.prune_freq, + indexing_start=connector_data.indexing_start, + ) + + @router.post("/admin/connector") def create_connector_from_model( - connector_data: ConnectorBase, - _: User = Depends(current_admin_user), + connector_data: ConnectorUpdateRequest, + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> ObjectCreationIdResponse: try: _validate_connector_allowed(connector_data.source) - return create_connector(connector_data, db_session) + connector_base = _check_connector_permissions(connector_data, user) + return create_connector( + db_session=db_session, + connector_data=connector_base, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@router.post("/admin/connector-with-mock-credential") +def create_connector_with_mock_credential( + connector_data: ConnectorUpdateRequest, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> StatusResponse: + if user and user.role != UserRole.ADMIN: + if connector_data.is_public: + raise HTTPException( + status_code=401, + detail="User does not have permission to create public credentials", + ) + if not connector_data.groups: + raise HTTPException( + status_code=401, + detail="Curators must specify 1+ groups", + ) + try: + _validate_connector_allowed(connector_data.source) + connector_response = create_connector( + db_session=db_session, connector_data=connector_data + ) + mock_credential = CredentialBase( + credential_json={}, admin_public=True, source=connector_data.source + ) + credential = create_credential( + mock_credential, user=user, db_session=db_session + ) + response = add_credential_to_connector( + db_session=db_session, + user=user, + connector_id=cast(int, connector_response.id), # will aways be an int + credential_id=credential.id, + is_public=connector_data.is_public or False, + cc_pair_name=connector_data.name, + groups=connector_data.groups, + ) + return response + except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -483,27 +606,22 @@ def create_connector_from_model( @router.patch("/admin/connector/{connector_id}") def update_connector_from_model( connector_id: int, - connector_data: ConnectorBase, - _: User = Depends(current_admin_user), + connector_data: ConnectorUpdateRequest, + user: User = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ConnectorSnapshot | StatusResponse[int]: try: _validate_connector_allowed(connector_data.source) + connector_base = _check_connector_permissions(connector_data, user) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) - updated_connector = update_connector(connector_id, connector_data, db_session) + updated_connector = update_connector(connector_id, connector_base, db_session) if updated_connector is None: raise HTTPException( status_code=404, detail=f"Connector {connector_id} does not exist" ) - if updated_connector.disabled: - cancel_indexing_attempts_for_connector(connector_id, db_session) - - # Just for good measure - cancel_indexing_attempts_past_model(db_session) - return ConnectorSnapshot( id=updated_connector.id, name=updated_connector.name, @@ -515,9 +633,9 @@ def update_connector_from_model( credential_ids=[ association.credential.id for association in updated_connector.credentials ], + indexing_start=updated_connector.indexing_start, time_created=updated_connector.time_created, time_updated=updated_connector.time_updated, - disabled=updated_connector.disabled, ) @@ -529,7 +647,10 @@ def delete_connector_by_id( ) -> StatusResponse[int]: try: with db_session.begin(): - return delete_connector(db_session=db_session, connector_id=connector_id) + return delete_connector( + db_session=db_session, + connector_id=connector_id, + ) except AssertionError: raise HTTPException(status_code=400, detail="Connector is not deletable") @@ -537,11 +658,12 @@ def delete_connector_by_id( @router.post("/admin/connector/run-once") def connector_run_once( run_info: RunConnectorRequest, - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse[list[int]]: connector_id = run_info.connector_id specified_credential_ids = run_info.credential_ids + try: possible_credential_ids = get_connector_credential_ids( run_info.connector_id, db_session @@ -583,18 +705,23 @@ def connector_run_once( ) ] - embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) + + connector_credential_pairs = [ + get_connector_credential_pair(run_info.connector_id, credential_id, db_session) + for credential_id in credential_ids + if credential_id not in skipped_credentials + ] index_attempt_ids = [ create_index_attempt( - connector_id=run_info.connector_id, - credential_id=credential_id, - embedding_model_id=embedding_model.id, + connector_credential_pair_id=connector_credential_pair.id, + search_settings_id=search_settings.id, from_beginning=run_info.from_beginning, db_session=db_session, ) - for credential_id in credential_ids - if credential_id not in skipped_credentials + for connector_credential_pair in connector_credential_pairs + if connector_credential_pair is not None ] if not index_attempt_ids: @@ -724,6 +851,7 @@ def get_connector_by_id( id=connector.id, name=connector.name, source=connector.source, + indexing_start=connector.indexing_start, input_type=connector.input_type, connector_specific_config=connector.connector_specific_config, refresh_freq=connector.refresh_freq, @@ -733,7 +861,6 @@ def get_connector_by_id( ], time_created=connector.time_created, time_updated=connector.time_updated, - disabled=connector.disabled, ) diff --git a/backend/danswer/server/documents/credential.py b/backend/danswer/server/documents/credential.py index a5e9098046a..ba30b65f2f9 100644 --- a/backend/danswer/server/documents/credential.py +++ b/backend/danswer/server/documents/credential.py @@ -1,36 +1,90 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Query from sqlalchemy.orm import Session -from danswer.auth.schemas import UserRole from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user +from danswer.auth.users import validate_curator_request +from danswer.db.credentials import alter_credential from danswer.db.credentials import create_credential +from danswer.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE from danswer.db.credentials import delete_credential from danswer.db.credentials import fetch_credential_by_id from danswer.db.credentials import fetch_credentials +from danswer.db.credentials import fetch_credentials_by_source +from danswer.db.credentials import swap_credentials_connector from danswer.db.credentials import update_credential from danswer.db.engine import get_session +from danswer.db.models import DocumentSource from danswer.db.models import User +from danswer.db.models import UserRole from danswer.server.documents.models import CredentialBase +from danswer.server.documents.models import CredentialDataUpdateRequest from danswer.server.documents.models import CredentialSnapshot +from danswer.server.documents.models import CredentialSwapRequest from danswer.server.documents.models import ObjectCreationIdResponse from danswer.server.models import StatusResponse +from danswer.utils.logger import setup_logger + +logger = setup_logger() router = APIRouter(prefix="/manage") +def _ignore_credential_permissions(source: DocumentSource) -> bool: + return source in CREDENTIAL_PERMISSIONS_TO_IGNORE + + """Admin-only endpoints""" @router.get("/admin/credential") def list_credentials_admin( - user: User = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> list[CredentialSnapshot]: """Lists all public credentials""" + credentials = fetch_credentials( + db_session=db_session, + user=user, + get_editable=False, + ) + return [ + CredentialSnapshot.from_credential_db_model(credential) + for credential in credentials + ] + + +@router.get("/admin/similar-credentials/{source_type}") +def get_cc_source_full_info( + source_type: DocumentSource, + user: User | None = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), + get_editable: bool = Query( + False, description="If true, return editable credentials" + ), +) -> list[CredentialSnapshot]: + credentials = fetch_credentials_by_source( + db_session=db_session, + user=user, + document_source=source_type, + get_editable=get_editable, + ) + return [ + CredentialSnapshot.from_credential_db_model(credential) + for credential in credentials + ] + + +@router.get("/credentials/{id}") +def list_credentials_by_id( + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[CredentialSnapshot]: credentials = fetch_credentials(db_session=db_session, user=user) return [ CredentialSnapshot.from_credential_db_model(credential) @@ -51,6 +105,49 @@ def delete_credential_by_id_admin( ) +@router.put("/admin/credentials/swap") +def swap_credentials_for_connector( + credential_swap_req: CredentialSwapRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> StatusResponse: + connector_credential_pair = swap_credentials_connector( + new_credential_id=credential_swap_req.new_credential_id, + connector_id=credential_swap_req.connector_id, + db_session=db_session, + user=user, + ) + + return StatusResponse( + success=True, + message="Credential swapped successfully", + data=connector_credential_pair.id, + ) + + +@router.post("/credential") +def create_credential_from_model( + credential_info: CredentialBase, + user: User | None = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> ObjectCreationIdResponse: + if ( + user + and user.role != UserRole.ADMIN + and not _ignore_credential_permissions(credential_info.source) + ): + validate_curator_request( + groups=credential_info.groups, + is_public=credential_info.curator_public, + ) + + credential = create_credential(credential_info, user, db_session) + return ObjectCreationIdResponse( + id=credential.id, + credential=CredentialSnapshot.from_credential_db_model(credential), + ) + + """Endpoints for all""" @@ -66,29 +163,31 @@ def list_credentials( ] -@router.post("/credential") -def create_credential_from_model( - credential_info: CredentialBase, - user: User | None = Depends(current_user), +@router.get("/credential/{credential_id}") +def get_credential_by_id( + credential_id: int, + user: User = Depends(current_user), db_session: Session = Depends(get_session), -) -> ObjectCreationIdResponse: - if user and user.role != UserRole.ADMIN and credential_info.admin_public: +) -> CredentialSnapshot | StatusResponse[int]: + credential = fetch_credential_by_id(credential_id, user, db_session) + if credential is None: raise HTTPException( - status_code=400, - detail="Non-admin cannot create admin credential", + status_code=401, + detail=f"Credential {credential_id} does not exist or does not belong to user", ) - credential = create_credential(credential_info, user, db_session) - return ObjectCreationIdResponse(id=credential.id) + return CredentialSnapshot.from_credential_db_model(credential) -@router.get("/credential/{credential_id}") -def get_credential_by_id( +@router.put("/admin/credentials/{credential_id}") +def update_credential_data( credential_id: int, + credential_update: CredentialDataUpdateRequest, user: User = Depends(current_user), db_session: Session = Depends(get_session), -) -> CredentialSnapshot | StatusResponse[int]: - credential = fetch_credential_by_id(credential_id, user, db_session) +) -> CredentialBase: + credential = alter_credential(credential_id, credential_update, user, db_session) + if credential is None: raise HTTPException( status_code=401, @@ -115,12 +214,15 @@ def update_credential_from_model( ) return CredentialSnapshot( + source=updated_credential.source, id=updated_credential.id, credential_json=updated_credential.credential_json, user_id=updated_credential.user_id, + name=updated_credential.name, admin_public=updated_credential.admin_public, time_created=updated_credential.time_created, time_updated=updated_credential.time_updated, + curator_public=updated_credential.curator_public, ) @@ -130,7 +232,25 @@ def delete_credential_by_id( user: User = Depends(current_user), db_session: Session = Depends(get_session), ) -> StatusResponse: - delete_credential(credential_id, user, db_session) + delete_credential( + credential_id, + user, + db_session, + ) + + return StatusResponse( + success=True, message="Credential deleted successfully", data=credential_id + ) + + +@router.delete("/credential/force/{credential_id}") +def force_delete_credential_by_id( + credential_id: int, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> StatusResponse: + delete_credential(credential_id, user, db_session, True) + return StatusResponse( success=True, message="Credential deleted successfully", data=credential_id ) diff --git a/backend/danswer/server/documents/document.py b/backend/danswer/server/documents/document.py index 3b0adea246c..bf8cdbcef44 100644 --- a/backend/danswer/server/documents/document.py +++ b/backend/danswer/server/documents/document.py @@ -5,12 +5,14 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_user -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings from danswer.document_index.factory import get_default_document_index -from danswer.llm.utils import get_default_llm_token_encode +from danswer.document_index.interfaces import VespaChunkRequest +from danswer.natural_language_processing.utils import get_tokenizer from danswer.prompts.prompt_utils import build_doc_context_str +from danswer.search.models import IndexFilters from danswer.search.preprocessing.access_filters import build_access_filters_for_user from danswer.server.documents.models import ChunkInfo from danswer.server.documents.models import DocumentInfo @@ -27,18 +29,16 @@ def get_document_info( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> DocumentInfo: - embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None + primary_index_name=search_settings.index_name, secondary_index_name=None ) user_acl_filters = build_access_filters_for_user(user, db_session) inference_chunks = document_index.id_based_retrieval( - document_id=document_id, - min_chunk_ind=None, - max_chunk_ind=None, - user_access_control_list=user_acl_filters, + chunk_requests=[VespaChunkRequest(document_id=document_id)], + filters=IndexFilters(access_control_list=user_acl_filters), ) if not inference_chunks: @@ -50,7 +50,10 @@ def get_document_info( # get actual document context used for LLM first_chunk = inference_chunks[0] - tokenizer_encode = get_default_llm_token_encode() + tokenizer_encode = get_tokenizer( + provider_type=search_settings.provider_type, + model_name=search_settings.model_name, + ).encode full_context_str = build_doc_context_str( semantic_identifier=first_chunk.semantic_identifier, source_type=first_chunk.source_type, @@ -73,18 +76,22 @@ def get_chunk_info( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> ChunkInfo: - embedding_model = get_current_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None + primary_index_name=search_settings.index_name, secondary_index_name=None ) user_acl_filters = build_access_filters_for_user(user, db_session) - inference_chunks = document_index.id_based_retrieval( + chunk_request = VespaChunkRequest( document_id=document_id, min_chunk_ind=chunk_id, max_chunk_ind=chunk_id, - user_access_control_list=user_acl_filters, + ) + inference_chunks = document_index.id_based_retrieval( + chunk_requests=[chunk_request], + filters=IndexFilters(access_control_list=user_acl_filters), + batch_retrieval=True, ) if not inference_chunks: @@ -92,7 +99,10 @@ def get_chunk_info( chunk_content = inference_chunks[0].content - tokenizer_encode = get_default_llm_token_encode() + tokenizer_encode = get_tokenizer( + provider_type=search_settings.provider_type, + model_name=search_settings.model_name, + ).encode return ChunkInfo( content=chunk_content, num_tokens=len(tokenizer_encode(chunk_content)) diff --git a/backend/danswer/server/documents/indexing.py b/backend/danswer/server/documents/indexing.py new file mode 100644 index 00000000000..4d5081c3fe7 --- /dev/null +++ b/backend/danswer/server/documents/indexing.py @@ -0,0 +1,23 @@ +from fastapi import APIRouter +from fastapi import Depends +from sqlalchemy.orm import Session + +from danswer.auth.users import current_admin_user +from danswer.db.engine import get_session +from danswer.db.index_attempt import ( + get_index_attempt_errors, +) +from danswer.db.models import User +from danswer.server.documents.models import IndexAttemptError + +router = APIRouter(prefix="/manage") + + +@router.get("/admin/indexing-errors/{index_attempt_id}") +def get_indexing_errors( + index_attempt_id: int, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[IndexAttemptError]: + indexing_errors = get_index_attempt_errors(index_attempt_id, db_session) + return [IndexAttemptError.from_db_model(e) for e in indexing_errors] diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index e02132e741a..ba011afc196 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -3,14 +3,18 @@ from uuid import UUID from pydantic import BaseModel +from pydantic import Field from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX from danswer.configs.constants import DocumentSource +from danswer.connectors.models import DocumentErrorSummary from danswer.connectors.models import InputType +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import Connector from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential from danswer.db.models import IndexAttempt +from danswer.db.models import IndexAttemptError as DbIndexAttemptError from danswer.db.models import IndexingStatus from danswer.db.models import TaskStatus from danswer.server.utils import mask_credential_dict @@ -26,36 +30,6 @@ class ChunkInfo(BaseModel): num_tokens: int -class IndexAttemptSnapshot(BaseModel): - id: int - status: IndexingStatus | None - new_docs_indexed: int # only includes completely new docs - total_docs_indexed: int # includes docs that are updated - docs_removed_from_index: int - error_msg: str | None - full_exception_trace: str | None - time_started: str | None - time_updated: str - - @classmethod - def from_index_attempt_db_model( - cls, index_attempt: IndexAttempt - ) -> "IndexAttemptSnapshot": - return IndexAttemptSnapshot( - id=index_attempt.id, - status=index_attempt.status, - new_docs_indexed=index_attempt.new_docs_indexed or 0, - total_docs_indexed=index_attempt.total_docs_indexed or 0, - docs_removed_from_index=index_attempt.docs_removed_from_index or 0, - error_msg=index_attempt.error_msg, - full_exception_trace=index_attempt.full_exception_trace, - time_started=index_attempt.time_started.isoformat() - if index_attempt.time_started - else None, - time_updated=index_attempt.time_updated.isoformat(), - ) - - class DeletionAttemptSnapshot(BaseModel): connector_id: int credential_id: int @@ -67,9 +41,15 @@ class ConnectorBase(BaseModel): source: DocumentSource input_type: InputType connector_specific_config: dict[str, Any] - refresh_freq: int | None # In seconds, None for one time index with no refresh - prune_freq: int | None - disabled: bool + # In seconds, None for one time index with no refresh + refresh_freq: int | None = None + prune_freq: int | None = None + indexing_start: datetime | None = None + + +class ConnectorUpdateRequest(ConnectorBase): + is_public: bool | None = None + groups: list[int] = Field(default_factory=list) class ConnectorSnapshot(ConnectorBase): @@ -77,6 +57,7 @@ class ConnectorSnapshot(ConnectorBase): credential_ids: list[int] time_created: datetime time_updated: datetime + source: DocumentSource @classmethod def from_connector_db_model(cls, connector: Connector) -> "ConnectorSnapshot": @@ -91,16 +72,30 @@ def from_connector_db_model(cls, connector: Connector) -> "ConnectorSnapshot": credential_ids=[ association.credential.id for association in connector.credentials ], + indexing_start=connector.indexing_start, time_created=connector.time_created, time_updated=connector.time_updated, - disabled=connector.disabled, ) +class CredentialSwapRequest(BaseModel): + new_credential_id: int + connector_id: int + + +class CredentialDataUpdateRequest(BaseModel): + name: str + credential_json: dict[str, Any] + + class CredentialBase(BaseModel): credential_json: dict[str, Any] # if `true`, then all Admins will have access to the credential admin_public: bool + source: DocumentSource + name: str | None = None + curator_public: bool = False + groups: list[int] = Field(default_factory=list) class CredentialSnapshot(CredentialBase): @@ -108,29 +103,101 @@ class CredentialSnapshot(CredentialBase): user_id: UUID | None time_created: datetime time_updated: datetime + name: str | None + source: DocumentSource + credential_json: dict[str, Any] + admin_public: bool + curator_public: bool @classmethod def from_credential_db_model(cls, credential: Credential) -> "CredentialSnapshot": return CredentialSnapshot( id=credential.id, - credential_json=mask_credential_dict(credential.credential_json) - if MASK_CREDENTIAL_PREFIX - else credential.credential_json, + credential_json=( + mask_credential_dict(credential.credential_json) + if MASK_CREDENTIAL_PREFIX and credential.credential_json + else credential.credential_json + ), user_id=credential.user_id, admin_public=credential.admin_public, time_created=credential.time_created, time_updated=credential.time_updated, + source=credential.source or DocumentSource.NOT_APPLICABLE, + name=credential.name, + curator_public=credential.curator_public, + ) + + +class IndexAttemptSnapshot(BaseModel): + id: int + status: IndexingStatus | None + new_docs_indexed: int # only includes completely new docs + total_docs_indexed: int # includes docs that are updated + docs_removed_from_index: int + error_msg: str | None + error_count: int + full_exception_trace: str | None + time_started: str | None + time_updated: str + + @classmethod + def from_index_attempt_db_model( + cls, index_attempt: IndexAttempt + ) -> "IndexAttemptSnapshot": + return IndexAttemptSnapshot( + id=index_attempt.id, + status=index_attempt.status, + new_docs_indexed=index_attempt.new_docs_indexed or 0, + total_docs_indexed=index_attempt.total_docs_indexed or 0, + docs_removed_from_index=index_attempt.docs_removed_from_index or 0, + error_msg=index_attempt.error_msg, + error_count=len(index_attempt.error_rows), + full_exception_trace=index_attempt.full_exception_trace, + time_started=( + index_attempt.time_started.isoformat() + if index_attempt.time_started + else None + ), + time_updated=index_attempt.time_updated.isoformat(), + ) + + +class IndexAttemptError(BaseModel): + id: int + index_attempt_id: int | None + batch_number: int | None + doc_summaries: list[DocumentErrorSummary] + error_msg: str | None + traceback: str | None + time_created: str + + @classmethod + def from_db_model(cls, error: DbIndexAttemptError) -> "IndexAttemptError": + doc_summaries = [ + DocumentErrorSummary.from_dict(summary) for summary in error.doc_summaries + ] + return IndexAttemptError( + id=error.id, + index_attempt_id=error.index_attempt_id, + batch_number=error.batch, + doc_summaries=doc_summaries, + error_msg=error.error_msg, + traceback=error.traceback, + time_created=error.time_created.isoformat(), ) class CCPairFullInfo(BaseModel): id: int name: str + status: ConnectorCredentialPairStatus num_docs_indexed: int connector: ConnectorSnapshot credential: CredentialSnapshot index_attempts: list[IndexAttemptSnapshot] latest_deletion_attempt: DeletionAttemptSnapshot | None + is_public: bool + is_editable_for_current_user: bool @classmethod def from_models( @@ -139,10 +206,12 @@ def from_models( index_attempt_models: list[IndexAttempt], latest_deletion_attempt: DeletionAttemptSnapshot | None, num_docs_indexed: int, # not ideal, but this must be computed separately + is_editable_for_current_user: bool, ) -> "CCPairFullInfo": return cls( id=cc_pair_model.id, name=cc_pair_model.name, + status=cc_pair_model.status, num_docs_indexed=num_docs_indexed, connector=ConnectorSnapshot.from_connector_db_model( cc_pair_model.connector @@ -155,6 +224,8 @@ def from_models( for index_attempt_model in index_attempt_models ], latest_deletion_attempt=latest_deletion_attempt, + is_public=cc_pair_model.is_public, + is_editable_for_current_user=is_editable_for_current_user, ) @@ -163,10 +234,13 @@ class ConnectorIndexingStatus(BaseModel): cc_pair_id: int name: str | None + cc_pair_status: ConnectorCredentialPairStatus connector: ConnectorSnapshot credential: CredentialSnapshot owner: str + groups: list[int] public_doc: bool + last_finished_status: IndexingStatus | None last_status: IndexingStatus | None last_success: datetime | None docs_indexed: int @@ -182,20 +256,21 @@ class ConnectorCredentialPairIdentifier(BaseModel): class ConnectorCredentialPairMetadata(BaseModel): - name: str | None - is_public: bool + name: str | None = None + is_public: bool | None = None + groups: list[int] = Field(default_factory=list) class ConnectorCredentialPairDescriptor(BaseModel): id: int - name: str | None + name: str | None = None connector: ConnectorSnapshot credential: CredentialSnapshot class RunConnectorRequest(BaseModel): connector_id: int - credential_ids: list[int] | None + credential_ids: list[int] | None = None from_beginning: bool = False @@ -242,6 +317,7 @@ class FileUploadResponse(BaseModel): class ObjectCreationIdResponse(BaseModel): id: int | str + credential: CredentialSnapshot | None = None class AuthStatus(BaseModel): diff --git a/backend/danswer/server/features/document_set/api.py b/backend/danswer/server/features/document_set/api.py index f939329bf9a..d1eff082891 100644 --- a/backend/danswer/server/features/document_set/api.py +++ b/backend/danswer/server/features/document_set/api.py @@ -1,21 +1,20 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Query from sqlalchemy.orm import Session -from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user +from danswer.auth.users import validate_curator_request from danswer.db.document_set import check_document_sets_are_public -from danswer.db.document_set import fetch_all_document_sets -from danswer.db.document_set import fetch_user_document_sets +from danswer.db.document_set import fetch_all_document_sets_for_user from danswer.db.document_set import insert_document_set from danswer.db.document_set import mark_document_set_as_to_be_deleted from danswer.db.document_set import update_document_set from danswer.db.engine import get_session from danswer.db.models import User -from danswer.server.documents.models import ConnectorCredentialPairDescriptor -from danswer.server.documents.models import ConnectorSnapshot -from danswer.server.documents.models import CredentialSnapshot +from danswer.db.models import UserRole from danswer.server.features.document_set.models import CheckDocSetPublicRequest from danswer.server.features.document_set.models import CheckDocSetPublicResponse from danswer.server.features.document_set.models import DocumentSet @@ -29,9 +28,14 @@ @router.post("/admin/document-set") def create_document_set( document_set_creation_request: DocumentSetCreationRequest, - user: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> int: + if user and user.role != UserRole.ADMIN: + validate_curator_request( + groups=document_set_creation_request.groups, + is_public=document_set_creation_request.is_public, + ) try: document_set_db_model, _ = insert_document_set( document_set_creation_request=document_set_creation_request, @@ -46,13 +50,19 @@ def create_document_set( @router.patch("/admin/document-set") def patch_document_set( document_set_update_request: DocumentSetUpdateRequest, - _: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: + if user and user.role != UserRole.ADMIN: + validate_curator_request( + groups=document_set_update_request.groups, + is_public=document_set_update_request.is_public, + ) try: update_document_set( document_set_update_request=document_set_update_request, db_session=db_session, + user=user, ) except Exception as e: raise HTTPException(status_code=400, detail=str(e)) @@ -61,64 +71,35 @@ def patch_document_set( @router.delete("/admin/document-set/{document_set_id}") def delete_document_set( document_set_id: int, - _: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: try: mark_document_set_as_to_be_deleted( - document_set_id=document_set_id, db_session=db_session + db_session=db_session, + document_set_id=document_set_id, + user=user, ) except Exception as e: raise HTTPException(status_code=400, detail=str(e)) -@router.get("/admin/document-set") -def list_document_sets_admin( - _: User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> list[DocumentSet]: - return [ - DocumentSet.from_model(ds) - for ds in fetch_all_document_sets(db_session=db_session) - ] - - """Endpoints for non-admins""" @router.get("/document-set") -def list_document_sets( +def list_document_sets_for_user( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), + get_editable: bool = Query( + False, description="If true, return editable document sets" + ), ) -> list[DocumentSet]: - document_set_info = fetch_user_document_sets( - user_id=user.id if user else None, db_session=db_session - ) return [ - DocumentSet( - id=document_set_db_model.id, - name=document_set_db_model.name, - description=document_set_db_model.description, - contains_non_public=any([not cc_pair.is_public for cc_pair in cc_pairs]), - cc_pair_descriptors=[ - ConnectorCredentialPairDescriptor( - id=cc_pair.id, - name=cc_pair.name, - connector=ConnectorSnapshot.from_connector_db_model( - cc_pair.connector - ), - credential=CredentialSnapshot.from_credential_db_model( - cc_pair.credential - ), - ) - for cc_pair in cc_pairs - ], - is_up_to_date=document_set_db_model.is_up_to_date, - is_public=document_set_db_model.is_public, - users=[user.id for user in document_set_db_model.users], - groups=[group.id for group in document_set_db_model.groups], + DocumentSet.from_model(ds) + for ds in fetch_all_document_sets_for_user( + db_session=db_session, user=user, get_editable=get_editable ) - for document_set_db_model, cc_pairs in document_set_info ] diff --git a/backend/danswer/server/features/document_set/models.py b/backend/danswer/server/features/document_set/models.py index 05ada42c89a..55f3376545f 100644 --- a/backend/danswer/server/features/document_set/models.py +++ b/backend/danswer/server/features/document_set/models.py @@ -1,6 +1,7 @@ from uuid import UUID from pydantic import BaseModel +from pydantic import Field from danswer.db.models import DocumentSet as DocumentSetDBModel from danswer.server.documents.models import ConnectorCredentialPairDescriptor @@ -14,8 +15,8 @@ class DocumentSetCreationRequest(BaseModel): cc_pair_ids: list[int] is_public: bool # For Private Document Sets, who should be able to access these - users: list[UUID] | None = None - groups: list[int] | None = None + users: list[UUID] = Field(default_factory=list) + groups: list[int] = Field(default_factory=list) class DocumentSetUpdateRequest(BaseModel): diff --git a/backend/danswer/server/features/folder/models.py b/backend/danswer/server/features/folder/models.py index d665fd91985..d7b161414a3 100644 --- a/backend/danswer/server/features/folder/models.py +++ b/backend/danswer/server/features/folder/models.py @@ -19,7 +19,7 @@ class FolderCreationRequest(BaseModel): class FolderUpdateRequest(BaseModel): - folder_name: str | None + folder_name: str | None = None class FolderChatSessionRequest(BaseModel): diff --git a/backend/danswer/server/features/input_prompt/__init__.py b/backend/danswer/server/features/input_prompt/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/server/features/input_prompt/api.py b/backend/danswer/server/features/input_prompt/api.py new file mode 100644 index 00000000000..58eecd0093d --- /dev/null +++ b/backend/danswer/server/features/input_prompt/api.py @@ -0,0 +1,134 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user +from danswer.db.engine import get_session +from danswer.db.input_prompt import fetch_input_prompt_by_id +from danswer.db.input_prompt import fetch_input_prompts_by_user +from danswer.db.input_prompt import fetch_public_input_prompts +from danswer.db.input_prompt import insert_input_prompt +from danswer.db.input_prompt import remove_input_prompt +from danswer.db.input_prompt import remove_public_input_prompt +from danswer.db.input_prompt import update_input_prompt +from danswer.db.models import User +from danswer.server.features.input_prompt.models import CreateInputPromptRequest +from danswer.server.features.input_prompt.models import InputPromptSnapshot +from danswer.server.features.input_prompt.models import UpdateInputPromptRequest +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +basic_router = APIRouter(prefix="/input_prompt") +admin_router = APIRouter(prefix="/admin/input_prompt") + + +@basic_router.get("") +def list_input_prompts( + user: User | None = Depends(current_user), + include_public: bool = False, + db_session: Session = Depends(get_session), +) -> list[InputPromptSnapshot]: + user_prompts = fetch_input_prompts_by_user( + user_id=user.id if user is not None else None, + db_session=db_session, + include_public=include_public, + ) + return [InputPromptSnapshot.from_model(prompt) for prompt in user_prompts] + + +@basic_router.get("/{input_prompt_id}") +def get_input_prompt( + input_prompt_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> InputPromptSnapshot: + input_prompt = fetch_input_prompt_by_id( + id=input_prompt_id, + user_id=user.id if user is not None else None, + db_session=db_session, + ) + return InputPromptSnapshot.from_model(input_prompt=input_prompt) + + +@basic_router.post("") +def create_input_prompt( + create_input_prompt_request: CreateInputPromptRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> InputPromptSnapshot: + input_prompt = insert_input_prompt( + prompt=create_input_prompt_request.prompt, + content=create_input_prompt_request.content, + is_public=create_input_prompt_request.is_public, + user=user, + db_session=db_session, + ) + return InputPromptSnapshot.from_model(input_prompt) + + +@basic_router.patch("/{input_prompt_id}") +def patch_input_prompt( + input_prompt_id: int, + update_input_prompt_request: UpdateInputPromptRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> InputPromptSnapshot: + try: + updated_input_prompt = update_input_prompt( + user=user, + input_prompt_id=input_prompt_id, + prompt=update_input_prompt_request.prompt, + content=update_input_prompt_request.content, + active=update_input_prompt_request.active, + db_session=db_session, + ) + except ValueError as e: + error_msg = "Error occurred while updated input prompt" + logger.warn(f"{error_msg}. Stack trace: {e}") + raise HTTPException(status_code=404, detail=error_msg) + + return InputPromptSnapshot.from_model(updated_input_prompt) + + +@basic_router.delete("/{input_prompt_id}") +def delete_input_prompt( + input_prompt_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + try: + remove_input_prompt(user, input_prompt_id, db_session) + + except ValueError as e: + error_msg = "Error occurred while deleting input prompt" + logger.warn(f"{error_msg}. Stack trace: {e}") + raise HTTPException(status_code=404, detail=error_msg) + + +@admin_router.delete("/{input_prompt_id}") +def delete_public_input_prompt( + input_prompt_id: int, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + try: + remove_public_input_prompt(input_prompt_id, db_session) + + except ValueError as e: + error_msg = "Error occurred while deleting input prompt" + logger.warn(f"{error_msg}. Stack trace: {e}") + raise HTTPException(status_code=404, detail=error_msg) + + +@admin_router.get("") +def list_public_input_prompts( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[InputPromptSnapshot]: + user_prompts = fetch_public_input_prompts( + db_session=db_session, + ) + return [InputPromptSnapshot.from_model(prompt) for prompt in user_prompts] diff --git a/backend/danswer/server/features/input_prompt/models.py b/backend/danswer/server/features/input_prompt/models.py new file mode 100644 index 00000000000..21ce2ba4e5b --- /dev/null +++ b/backend/danswer/server/features/input_prompt/models.py @@ -0,0 +1,47 @@ +from uuid import UUID + +from pydantic import BaseModel + +from danswer.db.models import InputPrompt +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class CreateInputPromptRequest(BaseModel): + prompt: str + content: str + is_public: bool + + +class UpdateInputPromptRequest(BaseModel): + prompt: str + content: str + active: bool + + +class InputPromptResponse(BaseModel): + id: int + prompt: str + content: str + active: bool + + +class InputPromptSnapshot(BaseModel): + id: int + prompt: str + content: str + active: bool + user_id: UUID | None + is_public: bool + + @classmethod + def from_model(cls, input_prompt: InputPrompt) -> "InputPromptSnapshot": + return InputPromptSnapshot( + id=input_prompt.id, + prompt=input_prompt.prompt, + content=input_prompt.content, + active=input_prompt.active, + user_id=input_prompt.user_id, + is_public=input_prompt.is_public, + ) diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py index 6739da46606..72b16d719ff 100644 --- a/backend/danswer/server/features/persona/api.py +++ b/backend/danswer/server/features/persona/api.py @@ -1,12 +1,17 @@ +import uuid from uuid import UUID from fastapi import APIRouter from fastapi import Depends +from fastapi import Query +from fastapi import UploadFile from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user +from danswer.configs.constants import FileOrigin from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.persona import create_update_persona @@ -17,6 +22,8 @@ from danswer.db.persona import update_all_personas_display_priority from danswer.db.persona import update_persona_shared_users from danswer.db.persona import update_persona_visibility +from danswer.file_store.file_store import get_default_file_store +from danswer.file_store.models import ChatFileType from danswer.llm.answering.prompts.utils import build_dummy_prompt from danswer.server.features.persona.models import CreatePersonaRequest from danswer.server.features.persona.models import PersonaSnapshot @@ -24,6 +31,7 @@ from danswer.server.models import DisplayPriorityRequest from danswer.utils.logger import setup_logger + logger = setup_logger() @@ -39,13 +47,14 @@ class IsVisibleRequest(BaseModel): def patch_persona_visibility( persona_id: int, is_visible_request: IsVisibleRequest, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: update_persona_visibility( persona_id=persona_id, is_visible=is_visible_request.is_visible, db_session=db_session, + user=user, ) @@ -63,16 +72,19 @@ def patch_persona_display_priority( @admin_router.get("") def list_personas_admin( - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), include_deleted: bool = False, + get_editable: bool = Query(False, description="If true, return editable personas"), ) -> list[PersonaSnapshot]: return [ PersonaSnapshot.from_model(persona) for persona in get_personas( db_session=db_session, - user_id=None, # user_id = None -> give back all personas + user=user, + get_editable=get_editable, include_deleted=include_deleted, + joinedload_all=True, ) ] @@ -90,6 +102,26 @@ def undelete_persona( ) +# used for assistat profile pictures +@admin_router.post("/upload-image") +def upload_file( + file: UploadFile, + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> dict[str, str]: + file_store = get_default_file_store(db_session) + file_type = ChatFileType.IMAGE + file_id = str(uuid.uuid4()) + file_store.save_file( + file_name=file_id, + content=file.file, + display_name=file.filename, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type=file.content_type or file_type.value, + ) + return {"file_id": file_id} + + """Endpoints for all""" @@ -160,11 +192,14 @@ def list_personas( db_session: Session = Depends(get_session), include_deleted: bool = False, ) -> list[PersonaSnapshot]: - user_id = user.id if user is not None else None return [ PersonaSnapshot.from_model(persona) for persona in get_personas( - user_id=user_id, include_deleted=include_deleted, db_session=db_session + user=user, + include_deleted=include_deleted, + db_session=db_session, + get_editable=False, + joinedload_all=True, ) ] diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py index aee39e72af0..777ef2037ee 100644 --- a/backend/danswer/server/features/persona/models.py +++ b/backend/danswer/server/features/persona/models.py @@ -1,6 +1,7 @@ from uuid import UUID from pydantic import BaseModel +from pydantic import Field from danswer.db.models import Persona from danswer.db.models import StarterMessage @@ -31,8 +32,12 @@ class CreatePersonaRequest(BaseModel): llm_model_version_override: str | None = None starter_messages: list[StarterMessage] | None = None # For Private Personas, who should be able to access these - users: list[UUID] | None = None - groups: list[int] | None = None + users: list[UUID] = Field(default_factory=list) + groups: list[int] = Field(default_factory=list) + icon_color: str | None = None + icon_shape: int | None = None + uploaded_image_id: str | None = None # New field for uploaded image + remove_image: bool | None = None class PersonaSnapshot(BaseModel): @@ -55,6 +60,9 @@ class PersonaSnapshot(BaseModel): document_sets: list[DocumentSet] users: list[MinimalUserSnapshot] groups: list[int] + icon_color: str | None + icon_shape: int | None + uploaded_image_id: str | None = None @classmethod def from_model( @@ -97,6 +105,9 @@ def from_model( for user in persona.users ], groups=[user_group.id for user_group in persona.groups], + icon_color=persona.icon_color, + icon_shape=persona.icon_shape, + uploaded_image_id=persona.uploaded_image_id, ) diff --git a/backend/danswer/server/features/tool/api.py b/backend/danswer/server/features/tool/api.py index b1f57a1a924..9635a276507 100644 --- a/backend/danswer/server/features/tool/api.py +++ b/backend/danswer/server/features/tool/api.py @@ -26,14 +26,14 @@ class CustomToolCreate(BaseModel): name: str - description: str | None + description: str | None = None definition: dict[str, Any] class CustomToolUpdate(BaseModel): - name: str | None - description: str | None - definition: dict[str, Any] | None + name: str | None = None + description: str | None = None + definition: dict[str, Any] | None = None def _validate_tool_definition(definition: dict[str, Any]) -> None: diff --git a/backend/danswer/server/gpts/api.py b/backend/danswer/server/gpts/api.py index a3ce59edc37..1bebc3bfc1e 100644 --- a/backend/danswer/server/gpts/api.py +++ b/backend/danswer/server/gpts/api.py @@ -7,6 +7,7 @@ from sqlalchemy.orm import Session from danswer.db.engine import get_session +from danswer.db.models import User from danswer.llm.factory import get_default_llms from danswer.search.models import SearchRequest from danswer.search.pipeline import SearchPipeline @@ -64,7 +65,7 @@ class GptSearchResponse(BaseModel): @router.post("/gpt-document-search") def gpt_search( search_request: GptSearchRequest, - _: str | None = Depends(api_key_dep), + _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> GptSearchResponse: llm, fast_llm = get_default_llms() diff --git a/backend/danswer/server/manage/administrative.py b/backend/danswer/server/manage/administrative.py index d6a52917f3b..0ac90ba8d11 100644 --- a/backend/danswer/server/manage/administrative.py +++ b/backend/danswer/server/manage/administrative.py @@ -9,15 +9,21 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_GEN_AI_KEY_CHECK_TIME from danswer.db.connector_credential_pair import get_connector_credential_pair +from danswer.db.connector_credential_pair import ( + update_connector_credential_pair_from_id, +) from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.engine import get_session +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.feedback import fetch_docs_ranked_by_boost from danswer.db.feedback import update_document_boost from danswer.db.feedback import update_document_hidden -from danswer.db.index_attempt import cancel_indexing_attempts_for_connector +from danswer.db.index_attempt import cancel_indexing_attempts_for_ccpair from danswer.db.models import User from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index @@ -30,13 +36,12 @@ from danswer.server.manage.models import BoostDoc from danswer.server.manage.models import BoostUpdateRequest from danswer.server.manage.models import HiddenUpdateRequest +from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger router = APIRouter(prefix="/manage") logger = setup_logger() -GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" - """Admin only API endpoints""" @@ -44,11 +49,14 @@ def get_most_boosted_docs( ascending: bool, limit: int, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> list[BoostDoc]: boost_docs = fetch_docs_ranked_by_boost( - ascending=ascending, limit=limit, db_session=db_session + ascending=ascending, + limit=limit, + db_session=db_session, + user=user, ) return [ BoostDoc( @@ -66,45 +74,43 @@ def get_most_boosted_docs( @router.post("/admin/doc-boosts") def document_boost_update( boost_update: BoostUpdateRequest, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> None: +) -> StatusResponse: curr_ind_name, sec_ind_name = get_both_index_names(db_session) document_index = get_default_document_index( primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name ) - try: - update_document_boost( - db_session=db_session, - document_id=boost_update.document_id, - boost=boost_update.boost, - document_index=document_index, - ) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + update_document_boost( + db_session=db_session, + document_id=boost_update.document_id, + boost=boost_update.boost, + document_index=document_index, + user=user, + ) + return StatusResponse(success=True, message="Updated document boost") @router.post("/admin/doc-hidden") def document_hidden_update( hidden_update: HiddenUpdateRequest, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> None: +) -> StatusResponse: curr_ind_name, sec_ind_name = get_both_index_names(db_session) document_index = get_default_document_index( primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name ) - try: - update_document_hidden( - db_session=db_session, - document_id=hidden_update.document_id, - hidden=hidden_update.hidden, - document_index=document_index, - ) - except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + update_document_hidden( + db_session=db_session, + document_id=hidden_update.document_id, + hidden=hidden_update.hidden, + document_index=document_index, + user=user, + ) + return StatusResponse(success=True, message="Updated document boost") @router.get("/admin/genai-api-key/validate") @@ -116,7 +122,7 @@ def validate_existing_genai_api_key( curr_time = datetime.now(tz=timezone.utc) try: last_check = datetime.fromtimestamp( - cast(float, kv_store.load(GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc + cast(float, kv_store.load(KV_GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc ) check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ) if curr_time - last_check < check_freq_sec: @@ -136,13 +142,13 @@ def validate_existing_genai_api_key( # Mark check as successful curr_time = datetime.now(tz=timezone.utc) - kv_store.store(GEN_AI_KEY_CHECK_TIME, curr_time.timestamp()) + kv_store.store(KV_GEN_AI_KEY_CHECK_TIME, curr_time.timestamp()) @router.post("/admin/deletion-attempt") def create_deletion_attempt_for_connector_id( connector_credential_pair_identifier: ConnectorCredentialPairIdentifier, - _: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: from danswer.background.celery.celery_app import ( @@ -156,6 +162,8 @@ def create_deletion_attempt_for_connector_id( db_session=db_session, connector_id=connector_id, credential_id=credential_id, + user=user, + get_editable=True, ) if cc_pair is None: raise HTTPException( @@ -165,8 +173,8 @@ def create_deletion_attempt_for_connector_id( ) # Cancel any scheduled indexing attempts - cancel_indexing_attempts_for_connector( - connector_id=connector_id, db_session=db_session, include_secondary_index=True + cancel_indexing_attempts_for_ccpair( + cc_pair_id=cc_pair.id, db_session=db_session, include_secondary_index=True ) # Check if the deletion attempt should be allowed @@ -179,6 +187,13 @@ def create_deletion_attempt_for_connector_id( detail=deletion_attempt_disallowed_reason, ) + # mark as deleting + update_connector_credential_pair_from_id( + db_session=db_session, + cc_pair_id=cc_pair.id, + status=ConnectorCredentialPairStatus.DELETING, + ) + # actually kick off the deletion cleanup_connector_credential_pair_task.apply_async( kwargs=dict(connector_id=connector_id, credential_id=credential_id), ) @@ -186,5 +201,5 @@ def create_deletion_attempt_for_connector_id( if cc_pair.connector.source == DocumentSource.FILE: connector = cc_pair.connector file_store = get_default_file_store(db_session) - for file_name in connector.connector_specific_config["file_locations"]: + for file_name in connector.connector_specific_config.get("file_locations", []): file_store.delete_file(file_name) diff --git a/backend/danswer/server/manage/embedding/api.py b/backend/danswer/server/manage/embedding/api.py new file mode 100644 index 00000000000..90fa69401c2 --- /dev/null +++ b/backend/danswer/server/manage/embedding/api.py @@ -0,0 +1,94 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from danswer.auth.users import current_admin_user +from danswer.db.engine import get_session +from danswer.db.llm import fetch_existing_embedding_providers +from danswer.db.llm import remove_embedding_provider +from danswer.db.llm import upsert_cloud_embedding_provider +from danswer.db.models import User +from danswer.db.search_settings import get_current_db_embedding_provider +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.server.manage.embedding.models import CloudEmbeddingProvider +from danswer.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest +from danswer.server.manage.embedding.models import TestEmbeddingRequest +from danswer.utils.logger import setup_logger +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import EmbedTextType + +logger = setup_logger() + + +admin_router = APIRouter(prefix="/admin/embedding") +basic_router = APIRouter(prefix="/embedding") + + +@admin_router.post("/test-embedding") +def test_embedding_configuration( + test_llm_request: TestEmbeddingRequest, + _: User | None = Depends(current_admin_user), +) -> None: + try: + test_model = EmbeddingModel( + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + api_key=test_llm_request.api_key, + provider_type=test_llm_request.provider_type, + normalize=False, + query_prefix=None, + passage_prefix=None, + model_name=None, + ) + test_model.encode(["Testing Embedding"], text_type=EmbedTextType.QUERY) + + except ValueError as e: + error_msg = f"Not a valid embedding model. Exception thrown: {e}" + logger.error(error_msg) + raise ValueError(error_msg) + + except Exception as e: + error_msg = "An error occurred while testing your embedding model. Please check your configuration." + logger.error(f"{error_msg} Error message: {e}", exc_info=True) + raise HTTPException(status_code=400, detail=error_msg) + + +@admin_router.get("/embedding-provider") +def list_embedding_providers( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[CloudEmbeddingProvider]: + return [ + CloudEmbeddingProvider.from_request(embedding_provider_model) + for embedding_provider_model in fetch_existing_embedding_providers(db_session) + ] + + +@admin_router.delete("/embedding-provider/{provider_type}") +def delete_embedding_provider( + provider_type: EmbeddingProvider, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + embedding_provider = get_current_db_embedding_provider(db_session=db_session) + if ( + embedding_provider is not None + and provider_type == embedding_provider.provider_type + ): + raise HTTPException( + status_code=400, detail="You can't delete a currently active model" + ) + + remove_embedding_provider(db_session, provider_type=provider_type) + + +@admin_router.put("/embedding-provider") +def put_cloud_embedding_provider( + provider: CloudEmbeddingProviderCreationRequest, + _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> CloudEmbeddingProvider: + return upsert_cloud_embedding_provider(db_session, provider) diff --git a/backend/danswer/server/manage/embedding/models.py b/backend/danswer/server/manage/embedding/models.py new file mode 100644 index 00000000000..132d311413c --- /dev/null +++ b/backend/danswer/server/manage/embedding/models.py @@ -0,0 +1,32 @@ +from typing import TYPE_CHECKING + +from pydantic import BaseModel + +from shared_configs.enums import EmbeddingProvider + +if TYPE_CHECKING: + from danswer.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel + + +class TestEmbeddingRequest(BaseModel): + provider_type: EmbeddingProvider + api_key: str | None = None + + +class CloudEmbeddingProvider(BaseModel): + provider_type: EmbeddingProvider + api_key: str | None = None + + @classmethod + def from_request( + cls, cloud_provider_model: "CloudEmbeddingProviderModel" + ) -> "CloudEmbeddingProvider": + return cls( + provider_type=cloud_provider_model.provider_type, + api_key=cloud_provider_model.api_key, + ) + + +class CloudEmbeddingProviderCreationRequest(BaseModel): + provider_type: EmbeddingProvider + api_key: str | None = None diff --git a/backend/danswer/server/manage/llm/api.py b/backend/danswer/server/manage/llm/api.py index 4df00b529af..9ea9fe927db 100644 --- a/backend/danswer/server/manage/llm/api.py +++ b/backend/danswer/server/manage/llm/api.py @@ -147,10 +147,10 @@ def set_provider_as_default( @basic_router.get("/provider") def list_llm_provider_basics( - _: User | None = Depends(current_user), + user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> list[LLMProviderDescriptor]: return [ LLMProviderDescriptor.from_model(llm_provider_model) - for llm_provider_model in fetch_existing_llm_providers(db_session) + for llm_provider_model in fetch_existing_llm_providers(db_session, user) ] diff --git a/backend/danswer/server/manage/llm/models.py b/backend/danswer/server/manage/llm/models.py index 05a596ffd54..3ef66971003 100644 --- a/backend/danswer/server/manage/llm/models.py +++ b/backend/danswer/server/manage/llm/models.py @@ -1,9 +1,11 @@ from typing import TYPE_CHECKING from pydantic import BaseModel +from pydantic import Field from danswer.llm.llm_provider_options import fetch_models_for_provider + if TYPE_CHECKING: from danswer.db.models import LLMProvider as LLMProviderModel @@ -31,6 +33,7 @@ class LLMProviderDescriptor(BaseModel): default_model_name: str fast_default_model_name: str | None is_default_provider: bool | None + display_model_names: list[str] | None @classmethod def from_model( @@ -47,29 +50,33 @@ def from_model( or fetch_models_for_provider(llm_provider_model.provider) or [llm_provider_model.default_model_name] ), + display_model_names=llm_provider_model.display_model_names, ) class LLMProvider(BaseModel): name: str provider: str - api_key: str | None - api_base: str | None - api_version: str | None - custom_config: dict[str, str] | None + api_key: str | None = None + api_base: str | None = None + api_version: str | None = None + custom_config: dict[str, str] | None = None default_model_name: str - fast_default_model_name: str | None + fast_default_model_name: str | None = None + is_public: bool = True + groups: list[int] = Field(default_factory=list) + display_model_names: list[str] | None = None class LLMProviderUpsertRequest(LLMProvider): # should only be used for a "custom" provider # for default providers, the built-in model names are used - model_names: list[str] | None + model_names: list[str] | None = None class FullLLMProvider(LLMProvider): id: int - is_default_provider: bool | None + is_default_provider: bool | None = None model_names: list[str] @classmethod @@ -85,9 +92,12 @@ def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider" default_model_name=llm_provider_model.default_model_name, fast_default_model_name=llm_provider_model.fast_default_model_name, is_default_provider=llm_provider_model.is_default_provider, + display_model_names=llm_provider_model.display_model_names, model_names=( llm_provider_model.model_names or fetch_models_for_provider(llm_provider_model.provider) or [llm_provider_model.default_model_name] ), + is_public=llm_provider_model.is_public, + groups=[group.id for group in llm_provider_model.groups], ) diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index f544df0f24e..160c90bdb78 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -1,11 +1,14 @@ -from typing import Any +from datetime import datetime from typing import TYPE_CHECKING from pydantic import BaseModel -from pydantic import root_validator -from pydantic import validator +from pydantic import ConfigDict +from pydantic import Field +from pydantic import field_validator +from pydantic import model_validator from danswer.auth.schemas import UserRole +from danswer.configs.app_configs import TRACK_EXTERNAL_IDP_EXPIRY from danswer.configs.constants import AuthType from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS from danswer.db.models import AllowedAnswerFilters @@ -14,13 +17,15 @@ from danswer.db.models import SlackBotResponseType from danswer.db.models import StandardAnswer as StandardAnswerModel from danswer.db.models import StandardAnswerCategory as StandardAnswerCategoryModel -from danswer.indexing.models import EmbeddingModelDetail +from danswer.db.models import User +from danswer.search.models import SavedSearchSettings from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.models import FullUserSnapshot from danswer.server.models import InvitedUserSnapshot + if TYPE_CHECKING: - from danswer.db.models import User as UserModel + pass class VersionResponse(BaseModel): @@ -35,7 +40,8 @@ class AuthTypeResponse(BaseModel): class UserPreferences(BaseModel): - chosen_assistants: list[int] | None + chosen_assistants: list[int] | None = None + default_model: str | None = None class UserInfo(BaseModel): @@ -46,9 +52,17 @@ class UserInfo(BaseModel): is_verified: bool role: UserRole preferences: UserPreferences + oidc_expiry: datetime | None = None + current_token_created_at: datetime | None = None + current_token_expiry_length: int | None = None @classmethod - def from_model(cls, user: "UserModel") -> "UserInfo": + def from_model( + cls, + user: User, + current_token_created_at: datetime | None = None, + expiry_length: int | None = None, + ) -> "UserInfo": return cls( id=str(user.id), email=user.email, @@ -56,7 +70,19 @@ def from_model(cls, user: "UserModel") -> "UserInfo": is_superuser=user.is_superuser, is_verified=user.is_verified, role=user.role, - preferences=(UserPreferences(chosen_assistants=user.chosen_assistants)), + preferences=( + UserPreferences( + chosen_assistants=user.chosen_assistants, + default_model=user.default_model, + ) + ), + # set to None if TRACK_EXTERNAL_IDP_EXPIRY is False so that we avoid cases + # where they previously had this set + used OIDC, and now they switched to + # basic auth are now constantly getting redirected back to the login page + # since their "oidc_expiry is old" + oidc_expiry=user.oidc_expiry if TRACK_EXTERNAL_IDP_EXPIRY else None, + current_token_created_at=current_token_created_at, + current_token_expiry_length=expiry_length, ) @@ -64,6 +90,11 @@ class UserByEmail(BaseModel): user_email: str +class UserRoleUpdateRequest(BaseModel): + user_email: str + new_role: UserRole + + class UserRoleResponse(BaseModel): role: str @@ -128,7 +159,8 @@ class StandardAnswerCreationRequest(BaseModel): answer: str categories: list[int] - @validator("categories", pre=True) + @field_validator("categories", mode="before") + @classmethod def validate_categories(cls, value: list[int]) -> list[int]: if len(value) < 1: raise ValueError( @@ -140,9 +172,7 @@ def validate_categories(cls, value: list[int]) -> list[int]: class SlackBotTokens(BaseModel): bot_token: str app_token: str - - class Config: - frozen = True + model_config = ConfigDict(frozen=True) class SlackBotConfigCreationRequest(BaseModel): @@ -150,22 +180,24 @@ class SlackBotConfigCreationRequest(BaseModel): # in the future, `document_sets` will probably be replaced # by an optional `PersonaSnapshot` object. Keeping it like this # for now for simplicity / speed of development - document_sets: list[int] | None - persona_id: int | None # NOTE: only one of `document_sets` / `persona_id` should be set + document_sets: list[int] | None = None + persona_id: ( + int | None + ) = None # NOTE: only one of `document_sets` / `persona_id` should be set channel_names: list[str] respond_tag_only: bool = False respond_to_bots: bool = False enable_auto_filters: bool = False # If no team members, assume respond in the channel to everyone - respond_team_member_list: list[str] = [] - respond_slack_group_list: list[str] = [] - answer_filters: list[AllowedAnswerFilters] = [] + respond_member_group_list: list[str] = Field(default_factory=list) + answer_filters: list[AllowedAnswerFilters] = Field(default_factory=list) # list of user emails follow_up_tags: list[str] | None = None response_type: SlackBotResponseType - standard_answer_categories: list[int] = [] + standard_answer_categories: list[int] = Field(default_factory=list) - @validator("answer_filters", pre=True) + @field_validator("answer_filters", mode="before") + @classmethod def validate_filters(cls, value: list[str]) -> list[str]: if any(test not in VALID_SLACK_FILTERS for test in value): raise ValueError( @@ -173,14 +205,12 @@ def validate_filters(cls, value: list[str]) -> list[str]: ) return value - @root_validator - def validate_document_sets_and_persona_id( - cls, values: dict[str, Any] - ) -> dict[str, Any]: - if values.get("document_sets") and values.get("persona_id"): + @model_validator(mode="after") + def validate_document_sets_and_persona_id(self) -> "SlackBotConfigCreationRequest": + if self.document_sets and self.persona_id: raise ValueError("Only one of `document_sets` / `persona_id` should be set") - return values + return self class SlackBotConfig(BaseModel): @@ -215,8 +245,8 @@ def from_model( class FullModelVersionResponse(BaseModel): - current_model: EmbeddingModelDetail - secondary_model: EmbeddingModelDetail | None + current_settings: SavedSearchSettings + secondary_settings: SavedSearchSettings | None class AllUsersResponse(BaseModel): diff --git a/backend/danswer/server/manage/search_settings.py b/backend/danswer/server/manage/search_settings.py new file mode 100644 index 00000000000..db483eff5da --- /dev/null +++ b/backend/danswer/server/manage/search_settings.py @@ -0,0 +1,180 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from fastapi import status +from sqlalchemy.orm import Session + +from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.connector_credential_pair import resync_cc_pair +from danswer.db.engine import get_session +from danswer.db.index_attempt import expire_index_attempts +from danswer.db.models import IndexModelStatus +from danswer.db.models import User +from danswer.db.search_settings import create_search_settings +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_embedding_provider_from_provider_type +from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.search_settings import update_current_search_settings +from danswer.db.search_settings import update_search_settings_status +from danswer.document_index.factory import get_default_document_index +from danswer.natural_language_processing.search_nlp_models import clean_model_name +from danswer.search.models import SavedSearchSettings +from danswer.search.models import SearchSettingsCreationRequest +from danswer.server.manage.models import FullModelVersionResponse +from danswer.server.models import IdReturn +from danswer.utils.logger import setup_logger +from shared_configs.configs import ALT_INDEX_SUFFIX + + +router = APIRouter(prefix="/search-settings") +logger = setup_logger() + + +@router.post("/set-new-search-settings") +def set_new_search_settings( + search_settings_new: SearchSettingsCreationRequest, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> IdReturn: + """Creates a new EmbeddingModel row and cancels the previous secondary indexing if any + Gives an error if the same model name is used as the current or secondary index + """ + if search_settings_new.index_name: + logger.warning("Index name was specified by request, this is not suggested") + + # Validate cloud provider exists + if search_settings_new.provider_type is not None: + cloud_provider = get_embedding_provider_from_provider_type( + db_session, provider_type=search_settings_new.provider_type + ) + + if cloud_provider is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}", + ) + + search_settings = get_current_search_settings(db_session) + + if search_settings_new.index_name is None: + # We define index name here + index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}" + if ( + search_settings_new.model_name == search_settings.model_name + and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX) + ): + index_name += ALT_INDEX_SUFFIX + search_values = search_settings_new.dict() + search_values["index_name"] = index_name + new_search_settings_request = SavedSearchSettings(**search_values) + else: + new_search_settings_request = SavedSearchSettings(**search_settings_new.dict()) + + secondary_search_settings = get_secondary_search_settings(db_session) + + if secondary_search_settings: + # Cancel any background indexing jobs + expire_index_attempts( + search_settings_id=secondary_search_settings.id, db_session=db_session + ) + + # Mark previous model as a past model directly + update_search_settings_status( + search_settings=secondary_search_settings, + new_status=IndexModelStatus.PAST, + db_session=db_session, + ) + + new_search_settings = create_search_settings( + search_settings=new_search_settings_request, db_session=db_session + ) + + # Ensure Vespa has the new index immediately + document_index = get_default_document_index( + primary_index_name=search_settings.index_name, + secondary_index_name=new_search_settings.index_name, + ) + document_index.ensure_indices_exist( + index_embedding_dim=search_settings.model_dim, + secondary_index_embedding_dim=new_search_settings.model_dim, + ) + + # Pause index attempts for the currently in use index to preserve resources + if DISABLE_INDEX_UPDATE_ON_SWAP: + expire_index_attempts( + search_settings_id=search_settings.id, db_session=db_session + ) + for cc_pair in get_connector_credential_pairs(db_session): + resync_cc_pair(cc_pair, db_session=db_session) + + return IdReturn(id=new_search_settings.id) + + +@router.post("/cancel-new-embedding") +def cancel_new_embedding( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + secondary_search_settings = get_secondary_search_settings(db_session) + + if secondary_search_settings: + expire_index_attempts( + search_settings_id=secondary_search_settings.id, db_session=db_session + ) + + update_search_settings_status( + search_settings=secondary_search_settings, + new_status=IndexModelStatus.PAST, + db_session=db_session, + ) + + +@router.get("/get-current-search-settings") +def get_curr_search_settings( + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> SavedSearchSettings: + current_search_settings = get_current_search_settings(db_session) + return SavedSearchSettings.from_db_model(current_search_settings) + + +@router.get("/get-secondary-search-settings") +def get_sec_search_settings( + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> SavedSearchSettings | None: + secondary_search_settings = get_secondary_search_settings(db_session) + if not secondary_search_settings: + return None + + return SavedSearchSettings.from_db_model(secondary_search_settings) + + +@router.get("/get-all-search-settings") +def get_all_search_settings( + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> FullModelVersionResponse: + current_search_settings = get_current_search_settings(db_session) + secondary_search_settings = get_secondary_search_settings(db_session) + return FullModelVersionResponse( + current_settings=SavedSearchSettings.from_db_model(current_search_settings), + secondary_settings=SavedSearchSettings.from_db_model(secondary_search_settings) + if secondary_search_settings + else None, + ) + + +# Updates current non-reindex search settings +@router.post("/update-inference-settings") +def update_saved_search_settings( + search_settings: SavedSearchSettings, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + update_current_search_settings( + search_settings=search_settings, db_session=db_session + ) diff --git a/backend/danswer/server/manage/secondary_index.py b/backend/danswer/server/manage/secondary_index.py deleted file mode 100644 index 6f5adf752f6..00000000000 --- a/backend/danswer/server/manage/secondary_index.py +++ /dev/null @@ -1,146 +0,0 @@ -from fastapi import APIRouter -from fastapi import Depends -from fastapi import HTTPException -from fastapi import status -from sqlalchemy.orm import Session - -from danswer.auth.users import current_admin_user -from danswer.auth.users import current_user -from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP -from danswer.db.connector_credential_pair import get_connector_credential_pairs -from danswer.db.connector_credential_pair import resync_cc_pair -from danswer.db.embedding_model import create_embedding_model -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model -from danswer.db.embedding_model import update_embedding_model_status -from danswer.db.engine import get_session -from danswer.db.index_attempt import expire_index_attempts -from danswer.db.models import IndexModelStatus -from danswer.db.models import User -from danswer.document_index.factory import get_default_document_index -from danswer.indexing.models import EmbeddingModelDetail -from danswer.server.manage.models import FullModelVersionResponse -from danswer.server.models import IdReturn -from danswer.utils.logger import setup_logger - -router = APIRouter(prefix="/secondary-index") -logger = setup_logger() - - -@router.post("/set-new-embedding-model") -def set_new_embedding_model( - embed_model_details: EmbeddingModelDetail, - _: User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> IdReturn: - """Creates a new EmbeddingModel row and cancels the previous secondary indexing if any - Gives an error if the same model name is used as the current or secondary index - """ - current_model = get_current_db_embedding_model(db_session) - - if embed_model_details.model_name == current_model.model_name: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="New embedding model is the same as the currently active one.", - ) - - secondary_model = get_secondary_db_embedding_model(db_session) - - if secondary_model: - if embed_model_details.model_name == secondary_model.model_name: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=f"Already reindexing with {secondary_model.model_name}", - ) - - # Cancel any background indexing jobs - expire_index_attempts( - embedding_model_id=secondary_model.id, db_session=db_session - ) - - # Mark previous model as a past model directly - update_embedding_model_status( - embedding_model=secondary_model, - new_status=IndexModelStatus.PAST, - db_session=db_session, - ) - - new_model = create_embedding_model( - model_details=embed_model_details, - db_session=db_session, - ) - - # Ensure Vespa has the new index immediately - document_index = get_default_document_index( - primary_index_name=current_model.index_name, - secondary_index_name=new_model.index_name, - ) - document_index.ensure_indices_exist( - index_embedding_dim=current_model.model_dim, - secondary_index_embedding_dim=new_model.model_dim, - ) - - # Pause index attempts for the currently in use index to preserve resources - if DISABLE_INDEX_UPDATE_ON_SWAP: - expire_index_attempts( - embedding_model_id=current_model.id, db_session=db_session - ) - for cc_pair in get_connector_credential_pairs(db_session): - resync_cc_pair(cc_pair, db_session=db_session) - - return IdReturn(id=new_model.id) - - -@router.post("/cancel-new-embedding") -def cancel_new_embedding( - _: User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> None: - secondary_model = get_secondary_db_embedding_model(db_session) - - if secondary_model: - expire_index_attempts( - embedding_model_id=secondary_model.id, db_session=db_session - ) - - update_embedding_model_status( - embedding_model=secondary_model, - new_status=IndexModelStatus.PAST, - db_session=db_session, - ) - - -@router.get("/get-current-embedding-model") -def get_current_embedding_model( - _: User | None = Depends(current_user), - db_session: Session = Depends(get_session), -) -> EmbeddingModelDetail: - current_model = get_current_db_embedding_model(db_session) - return EmbeddingModelDetail.from_model(current_model) - - -@router.get("/get-secondary-embedding-model") -def get_secondary_embedding_model( - _: User | None = Depends(current_user), - db_session: Session = Depends(get_session), -) -> EmbeddingModelDetail | None: - next_model = get_secondary_db_embedding_model(db_session) - if not next_model: - return None - - return EmbeddingModelDetail.from_model(next_model) - - -@router.get("/get-embedding-models") -def get_embedding_models( - _: User | None = Depends(current_user), - db_session: Session = Depends(get_session), -) -> FullModelVersionResponse: - current_model = get_current_db_embedding_model(db_session) - next_model = get_secondary_db_embedding_model(db_session) - return FullModelVersionResponse( - current_model=EmbeddingModelDetail.from_model(current_model), - secondary_model=EmbeddingModelDetail.from_model(next_model) - if next_model - else None, - ) diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py index d5f08e2694a..0fb1459072b 100644 --- a/backend/danswer/server/manage/slack_bot.py +++ b/backend/danswer/server/manage/slack_bot.py @@ -34,11 +34,8 @@ def _form_channel_config( ) -> ChannelConfig: raw_channel_names = slack_bot_config_creation_request.channel_names respond_tag_only = slack_bot_config_creation_request.respond_tag_only - respond_team_member_list = ( - slack_bot_config_creation_request.respond_team_member_list - ) - respond_slack_group_list = ( - slack_bot_config_creation_request.respond_slack_group_list + respond_member_group_list = ( + slack_bot_config_creation_request.respond_member_group_list ) answer_filters = slack_bot_config_creation_request.answer_filters follow_up_tags = slack_bot_config_creation_request.follow_up_tags @@ -61,7 +58,7 @@ def _form_channel_config( detail=str(e), ) - if respond_tag_only and (respond_team_member_list or respond_slack_group_list): + if respond_tag_only and respond_member_group_list: raise ValueError( "Cannot set DanswerBot to only respond to tags only and " "also respond to a predetermined set of users." @@ -72,10 +69,8 @@ def _form_channel_config( } if respond_tag_only is not None: channel_config["respond_tag_only"] = respond_tag_only - if respond_team_member_list: - channel_config["respond_team_member_list"] = respond_team_member_list - if respond_slack_group_list: - channel_config["respond_slack_group_list"] = respond_slack_group_list + if respond_member_group_list: + channel_config["respond_member_group_list"] = respond_member_group_list if answer_filters: channel_config["answer_filters"] = answer_filters if follow_up_tags is not None: @@ -166,6 +161,7 @@ def patch_slack_bot_config( channel_names=channel_config["channel_names"], document_set_ids=slack_bot_config_creation_request.document_sets, existing_persona_id=existing_persona_id, + enable_auto_filters=slack_bot_config_creation_request.enable_auto_filters, ).id slack_bot_config_model = update_slack_bot_config( diff --git a/backend/danswer/server/manage/users.py b/backend/danswer/server/manage/users.py index c635469919e..d2fd981b5b5 100644 --- a/backend/danswer/server/manage/users.py +++ b/backend/danswer/server/manage/users.py @@ -1,11 +1,17 @@ import re +from datetime import datetime +from datetime import timezone +from email_validator import validate_email from fastapi import APIRouter from fastapi import Body from fastapi import Depends from fastapi import HTTPException from fastapi import status from pydantic import BaseModel +from sqlalchemy import Column +from sqlalchemy import desc +from sqlalchemy import select from sqlalchemy import update from sqlalchemy.orm import Session @@ -16,12 +22,15 @@ from danswer.auth.schemas import UserRole from danswer.auth.schemas import UserStatus from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.auth.users import optional_user from danswer.configs.app_configs import AUTH_TYPE +from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS from danswer.configs.app_configs import VALID_EMAIL_DOMAINS from danswer.configs.constants import AuthType from danswer.db.engine import get_session +from danswer.db.models import AccessToken from danswer.db.models import User from danswer.db.users import get_user_by_email from danswer.db.users import list_users @@ -30,11 +39,13 @@ from danswer.server.manage.models import UserByEmail from danswer.server.manage.models import UserInfo from danswer.server.manage.models import UserRoleResponse +from danswer.server.manage.models import UserRoleUpdateRequest from danswer.server.models import FullUserSnapshot from danswer.server.models import InvitedUserSnapshot from danswer.server.models import MinimalUserSnapshot from danswer.utils.logger import setup_logger from ee.danswer.db.api_key import is_api_key_email_address +from ee.danswer.db.user_group import remove_curator_status__no_commit logger = setup_logger() @@ -44,42 +55,38 @@ USERS_PAGE_SIZE = 10 -@router.patch("/manage/promote-user-to-admin") -def promote_admin( - user_email: UserByEmail, - _: User = Depends(current_admin_user), +@router.patch("/manage/set-user-role") +def set_user_role( + user_role_update_request: UserRoleUpdateRequest, + current_user: User = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> None: - user_to_promote = get_user_by_email( - email=user_email.user_email, db_session=db_session + user_to_update = get_user_by_email( + email=user_role_update_request.user_email, db_session=db_session ) - if not user_to_promote: + if not user_to_update: raise HTTPException(status_code=404, detail="User not found") - user_to_promote.role = UserRole.ADMIN - db_session.add(user_to_promote) - db_session.commit() + if user_role_update_request.new_role == UserRole.CURATOR: + raise HTTPException( + status_code=400, + detail="Curator role must be set via the User Group Menu", + ) + if user_to_update.role == user_role_update_request.new_role: + return -@router.patch("/manage/demote-admin-to-basic") -async def demote_admin( - user_email: UserByEmail, - user: User = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> None: - user_to_demote = get_user_by_email( - email=user_email.user_email, db_session=db_session - ) - if not user_to_demote: - raise HTTPException(status_code=404, detail="User not found") - - if user_to_demote.id == user.id: + if current_user.id == user_to_update.id: raise HTTPException( - status_code=400, detail="Cannot demote yourself from admin role!" + status_code=400, + detail="An admin cannot demote themselves from admin role!", ) - user_to_demote.role = UserRole.BASIC - db_session.add(user_to_demote) + if user_to_update.role == UserRole.CURATOR: + remove_curator_status__no_commit(db_session, user_to_update) + + user_to_update.role = user_role_update_request.new_role.value + db_session.commit() @@ -88,7 +95,7 @@ def list_all_users( q: str | None = None, accepted_page: int | None = None, invited_page: int | None = None, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> AllUsersResponse: if not q: @@ -96,7 +103,7 @@ def list_all_users( users = [ user - for user in list_users(db_session, q=q) + for user in list_users(db_session, email_filter_string=q, user=user) if not is_api_key_email_address(user.email) ] accepted_emails = {user.email for user in users} @@ -117,9 +124,9 @@ def list_all_users( id=user.id, email=user.email, role=user.role, - status=UserStatus.LIVE - if user.is_active - else UserStatus.DEACTIVATED, + status=( + UserStatus.LIVE if user.is_active else UserStatus.DEACTIVATED + ), ) for user in users ], @@ -152,12 +159,18 @@ def bulk_invite_users( emails: list[str] = Body(..., embed=True), current_user: User | None = Depends(current_admin_user), ) -> int: + """emails are string validated. If any email fails validation, no emails are + invited and an exception is raised.""" if current_user is None: raise HTTPException( status_code=400, detail="Auth is disabled, cannot invite users" ) - all_emails = list(set(emails) | set(get_invited_users())) + normalized_emails = [] + for email in emails: + email_info = validate_email(email) # can raise EmailNotValidError + normalized_emails.append(email_info.normalized) # type: ignore + all_emails = list(set(normalized_emails) | set(get_invited_users())) return write_invited_users(all_emails) @@ -246,9 +259,35 @@ async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse: return UserRoleResponse(role=user.role) +def get_current_token_creation( + user: User | None, db_session: Session +) -> datetime | None: + if user is None: + return None + try: + result = db_session.execute( + select(AccessToken) + .where(AccessToken.user_id == user.id) # type: ignore + .order_by(desc(Column("created_at"))) + .limit(1) + ) + access_token = result.scalar_one_or_none() + + if access_token: + return access_token.created_at + else: + logger.error("No AccessToken found for user") + return None + + except Exception as e: + logger.error(f"Error fetching AccessToken: {e}") + return None + + @router.get("/me") def verify_user_logged_in( user: User | None = Depends(optional_user), + db_session: Session = Depends(get_session), ) -> UserInfo: # NOTE: this does not use `current_user` / `current_admin_user` because we don't want # to enforce user verification here - the frontend always wants to get the info about @@ -264,12 +303,53 @@ def verify_user_logged_in( status_code=status.HTTP_403_FORBIDDEN, detail="User Not Authenticated" ) - return UserInfo.from_model(user) + if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied. User's OIDC token has expired.", + ) + + token_created_at = get_current_token_creation(user, db_session) + user_info = UserInfo.from_model( + user, + current_token_created_at=token_created_at, + expiry_length=SESSION_EXPIRE_TIME_SECONDS, + ) + + return user_info """APIs to adjust user preferences""" +class ChosenDefaultModelRequest(BaseModel): + default_model: str | None = None + + +@router.patch("/user/default-model") +def update_user_default_model( + request: ChosenDefaultModelRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + if user is None: + if AUTH_TYPE == AuthType.DISABLED: + store = get_dynamic_config_store() + no_auth_user = fetch_no_auth_user(store) + no_auth_user.preferences.default_model = request.default_model + set_no_auth_user_preferences(store, no_auth_user.preferences) + return + else: + raise RuntimeError("This should never happen") + + db_session.execute( + update(User) + .where(User.id == user.id) # type: ignore + .values(default_model=request.default_model) + ) + db_session.commit() + + class ChosenAssistantsRequest(BaseModel): chosen_assistants: list[int] diff --git a/backend/danswer/server/middleware/latency_logging.py b/backend/danswer/server/middleware/latency_logging.py index f2bc3127af4..ed269a545d1 100644 --- a/backend/danswer/server/middleware/latency_logging.py +++ b/backend/danswer/server/middleware/latency_logging.py @@ -16,7 +16,7 @@ async def log_latency( start_time = time.monotonic() response = await call_next(request) process_time = time.monotonic() - start_time - logger.info( + logger.debug( f"Path: {request.url.path} - Method: {request.method} - " f"Status Code: {response.status_code} - Time: {process_time:.4f} secs" ) diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index fa70189f11c..9c78851eb06 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -4,7 +4,6 @@ from uuid import UUID from pydantic import BaseModel -from pydantic.generics import GenericModel from danswer.auth.schemas import UserRole from danswer.auth.schemas import UserStatus @@ -13,7 +12,7 @@ DataT = TypeVar("DataT") -class StatusResponse(GenericModel, Generic[DataT]): +class StatusResponse(BaseModel, Generic[DataT]): success: bool message: Optional[str] = None data: Optional[DataT] = None diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 646660c9fac..a37758336a2 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -1,5 +1,8 @@ +import asyncio import io import uuid +from collections.abc import Callable +from collections.abc import Generator from fastapi import APIRouter from fastapi import Depends @@ -44,8 +47,9 @@ ) from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llms +from danswer.llm.factory import get_llms_for_persona from danswer.llm.headers import get_litellm_additional_request_headers -from danswer.llm.utils import get_default_llm_tokenizer +from danswer.natural_language_processing.utils import get_tokenizer from danswer.secondary_llm_flows.chat_session_naming import ( get_renamed_conversation_name, ) @@ -128,7 +132,6 @@ def get_chat_session( db_session: Session = Depends(get_session), ) -> ChatSessionDetailResponse: user_id = user.id if user is not None else None - try: chat_session = get_chat_session_by_id( chat_session_id=session_id, @@ -207,8 +210,6 @@ def rename_chat_session( chat_session_id = rename_req.chat_session_id user_id = user.id if user is not None else None - logger.info(f"Received rename request for chat session: {chat_session_id}") - if name: update_chat_session( db_session=db_session, @@ -271,19 +272,39 @@ def delete_chat_session_by_id( delete_chat_session(user_id, session_id, db_session) +async def is_disconnected(request: Request) -> Callable[[], bool]: + main_loop = asyncio.get_event_loop() + + def is_disconnected_sync() -> bool: + future = asyncio.run_coroutine_threadsafe(request.is_disconnected(), main_loop) + try: + return not future.result(timeout=0.01) + except asyncio.TimeoutError: + logger.error("Asyncio timed out") + return True + except Exception as e: + error_msg = str(e) + logger.critical( + f"An unexpected error occured with the disconnect check coroutine: {error_msg}" + ) + return True + + return is_disconnected_sync + + @router.post("/send-message") def handle_new_chat_message( chat_message_req: CreateChatMessageRequest, request: Request, user: User | None = Depends(current_user), _: None = Depends(check_token_rate_limits), + is_disconnected_func: Callable[[], bool] = Depends(is_disconnected), ) -> StreamingResponse: """This endpoint is both used for all the following purposes: - Sending a new message in the session - Regenerating a message in the session (just send the same one again) - Editing a message (similar to regenerating but sending a different message) - Kicking off a seeded chat session (set `use_existing_user_message`) - To avoid extra overhead/latency, this assumes (and checks) that previous messages on the path have already been set as latest""" logger.debug(f"Received new chat message: {chat_message_req.message}") @@ -295,16 +316,26 @@ def handle_new_chat_message( ): raise HTTPException(status_code=400, detail="Empty chat message is invalid") - packets = stream_chat_message( - new_msg_req=chat_message_req, - user=user, - use_existing_user_message=chat_message_req.use_existing_user_message, - litellm_additional_headers=get_litellm_additional_request_headers( - request.headers - ), - ) + import json + + def stream_generator() -> Generator[str, None, None]: + try: + for packet in stream_chat_message( + new_msg_req=chat_message_req, + user=user, + use_existing_user_message=chat_message_req.use_existing_user_message, + litellm_additional_headers=get_litellm_additional_request_headers( + request.headers + ), + is_connected=is_disconnected_func, + ): + yield json.dumps(packet) if isinstance(packet, dict) else packet - return StreamingResponse(packets, media_type="application/json") + except Exception as e: + logger.exception(f"Error in chat message streaming: {e}") + yield json.dumps({"error": str(e)}) + + return StreamingResponse(stream_generator(), media_type="text/event-stream") @router.put("/set-message-as-latest") @@ -443,6 +474,14 @@ def seed_chat( root_message = get_or_create_root_message( chat_session_id=new_chat_session.id, db_session=db_session ) + llm, fast_llm = get_llms_for_persona(persona=new_chat_session.persona) + + tokenizer = get_tokenizer( + model_name=llm.config.model_name, + provider_type=llm.config.model_provider, + ) + token_count = len(tokenizer.encode(chat_seed_request.message)) + create_new_chat_message( chat_session_id=new_chat_session.id, parent_message=root_message, @@ -453,9 +492,7 @@ def seed_chat( else None ), message=chat_seed_request.message, - token_count=len( - get_default_llm_tokenizer().encode(chat_seed_request.message) - ), + token_count=token_count, message_type=MessageType.USER, db_session=db_session, ) @@ -484,6 +521,7 @@ def upload_files_for_chat( "text/tab-separated-values", "application/json", "application/xml", + "text/xml", "application/x-yaml", } document_content_types = { diff --git a/backend/danswer/server/query_and_chat/models.py b/backend/danswer/server/query_and_chat/models.py index ea1ce1ff680..55d1094ea86 100644 --- a/backend/danswer/server/query_and_chat/models.py +++ b/backend/danswer/server/query_and_chat/models.py @@ -2,7 +2,7 @@ from typing import Any from pydantic import BaseModel -from pydantic import root_validator +from pydantic import model_validator from danswer.chat.models import RetrievalDocs from danswer.configs.constants import DocumentSource @@ -44,11 +44,6 @@ class ChatSessionCreationRequest(BaseModel): description: str | None = None -class HelperResponse(BaseModel): - values: dict[str, str] - details: list[str] | None = None - - class CreateChatSessionID(BaseModel): chat_session_id: int @@ -59,16 +54,11 @@ class ChatFeedbackRequest(BaseModel): feedback_text: str | None = None predefined_feedback: str | None = None - @root_validator - def check_is_positive_or_feedback_text(cls: BaseModel, values: dict) -> dict: - is_positive, feedback_text = values.get("is_positive"), values.get( - "feedback_text" - ) - - if is_positive is None and feedback_text is None: + @model_validator(mode="after") + def check_is_positive_or_feedback_text(self) -> "ChatFeedbackRequest": + if self.is_positive is None and self.feedback_text is None: raise ValueError("Empty feedback received.") - - return values + return self """ @@ -90,7 +80,7 @@ class CreateChatMessageRequest(ChunkContext): parent_message_id: int | None # New message contents message: str - # file's that we should attach to this message + # Files that we should attach to this message file_descriptors: list[FileDescriptor] # If no prompt provided, uses the largest prompt of the chat session # but really this should be explicitly specified, only in the simplified APIs is this inferred @@ -103,7 +93,11 @@ class CreateChatMessageRequest(ChunkContext): # will disable Query Rewording if specified query_override: str | None = None + # enables additional handling to ensure that we regenerate with a given user message ID + regenerate: bool | None = None + # allows the caller to override the Persona / Prompt + # these do not persist in the chat thread details llm_override: LLMOverride | None = None prompt_override: PromptOverride | None = None @@ -113,18 +107,13 @@ class CreateChatMessageRequest(ChunkContext): # used for seeded chats to kick off the generation of an AI answer use_existing_user_message: bool = False - @root_validator - def check_search_doc_ids_or_retrieval_options(cls: BaseModel, values: dict) -> dict: - search_doc_ids, retrieval_options = values.get("search_doc_ids"), values.get( - "retrieval_options" - ) - - if search_doc_ids is None and retrieval_options is None: + @model_validator(mode="after") + def check_search_doc_ids_or_retrieval_options(self) -> "CreateChatMessageRequest": + if self.search_doc_ids is None and self.retrieval_options is None: raise ValueError( "Either search_doc_ids or retrieval_options must be provided, but not both or neither." ) - - return values + return self class ChatMessageIdentifier(BaseModel): @@ -150,7 +139,7 @@ class ChatSessionDetails(BaseModel): persona_id: int time_created: str shared_status: ChatSessionSharedStatus - folder_id: int | None + folder_id: int | None = None current_alternate_model: str | None = None @@ -163,39 +152,47 @@ class SearchFeedbackRequest(BaseModel): document_id: str document_rank: int click: bool - search_feedback: SearchFeedbackType | None + search_feedback: SearchFeedbackType | None = None - @root_validator - def check_click_or_search_feedback(cls: BaseModel, values: dict) -> dict: - click, feedback = values.get("click"), values.get("search_feedback") + @model_validator(mode="after") + def check_click_or_search_feedback(self) -> "SearchFeedbackRequest": + click, feedback = self.click, self.search_feedback if click is False and feedback is None: raise ValueError("Empty feedback received.") - - return values + return self class ChatMessageDetail(BaseModel): message_id: int - parent_message: int | None - latest_child_message: int | None + parent_message: int | None = None + latest_child_message: int | None = None message: str - rephrased_query: str | None - context_docs: RetrievalDocs | None + rephrased_query: str | None = None + context_docs: RetrievalDocs | None = None message_type: MessageType time_sent: datetime - alternate_assistant_id: str | None + overridden_model: str | None + alternate_assistant_id: int | None = None # Dict mapping citation number to db_doc_id - citations: dict[int, int] | None + chat_session_id: int | None = None + citations: dict[int, int] | None = None files: list[FileDescriptor] tool_calls: list[ToolCallFinalResult] - def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore - initial_dict = super().dict(*args, **kwargs) # type: ignore + def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + initial_dict = super().model_dump(mode="json", *args, **kwargs) # type: ignore initial_dict["time_sent"] = self.time_sent.isoformat() return initial_dict +class SearchSessionDetailResponse(BaseModel): + search_session_id: int + description: str + documents: list[SearchDoc] + messages: list[ChatMessageDetail] + + class ChatSessionDetailResponse(BaseModel): chat_session_id: int description: str @@ -219,7 +216,3 @@ class AdminSearchRequest(BaseModel): class AdminSearchResponse(BaseModel): documents: list[SearchDoc] - - -class DanswerAnswer(BaseModel): - answer: str | None diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index 43192211b79..704b16d5eaa 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -4,12 +4,20 @@ from fastapi.responses import StreamingResponse from sqlalchemy.orm import Session -from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.configs.constants import DocumentSource -from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.configs.constants import MessageType +from danswer.db.chat import get_chat_messages_by_session +from danswer.db.chat import get_chat_session_by_id +from danswer.db.chat import get_chat_sessions_by_user +from danswer.db.chat import get_first_messages_for_chat_sessions +from danswer.db.chat import get_search_docs_for_chat_message +from danswer.db.chat import translate_db_message_to_chat_message_detail +from danswer.db.chat import translate_db_search_doc_to_server_search_doc from danswer.db.engine import get_session from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings from danswer.db.tag import get_tags_by_value_prefix_for_source_types from danswer.document_index.factory import get_default_document_index from danswer.document_index.vespa.index import VespaIndex @@ -18,14 +26,15 @@ from danswer.search.models import IndexFilters from danswer.search.models import SearchDoc from danswer.search.preprocessing.access_filters import build_access_filters_for_user -from danswer.search.preprocessing.danswer_helper import recommend_search_flow from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.secondary_llm_flows.query_validation import get_query_answerability from danswer.secondary_llm_flows.query_validation import stream_query_answerability from danswer.server.query_and_chat.models import AdminSearchRequest from danswer.server.query_and_chat.models import AdminSearchResponse -from danswer.server.query_and_chat.models import HelperResponse +from danswer.server.query_and_chat.models import ChatSessionDetails +from danswer.server.query_and_chat.models import ChatSessionsResponse from danswer.server.query_and_chat.models import QueryValidationResponse +from danswer.server.query_and_chat.models import SearchSessionDetailResponse from danswer.server.query_and_chat.models import SimpleQueryRequest from danswer.server.query_and_chat.models import SourceTag from danswer.server.query_and_chat.models import TagResponse @@ -41,12 +50,11 @@ @admin_router.post("/search") def admin_search( question: AdminSearchRequest, - user: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> AdminSearchResponse: query = question.query - logger.info(f"Received admin search query: {query}") - + logger.notice(f"Received admin search query: {query}") user_acl_filters = build_access_filters_for_user(user, db_session) final_filters = IndexFilters( source_type=question.filters.source_type, @@ -55,19 +63,15 @@ def admin_search( tags=question.filters.tags, access_control_list=user_acl_filters, ) - - embedding_model = get_current_db_embedding_model(db_session) - + search_settings = get_current_search_settings(db_session) document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None + primary_index_name=search_settings.index_name, secondary_index_name=None ) - if not isinstance(document_index, VespaIndex): raise HTTPException( status_code=400, detail="Cannot use admin-search when using a non-Vespa document index", ) - matching_chunks = document_index.admin_retrieval(query=query, filters=final_filters) documents = chunks_or_sections_to_search_docs(matching_chunks) @@ -88,6 +92,7 @@ def get_tags( # If this is empty or None, then tags for all sources are considered sources: list[DocumentSource] | None = None, allow_prefix: bool = True, # This is currently the only option + limit: int = 50, _: User = Depends(current_user), db_session: Session = Depends(get_session), ) -> TagResponse: @@ -95,8 +100,10 @@ def get_tags( raise NotImplementedError("Cannot disable prefix match for now") db_tags = get_tags_by_value_prefix_for_source_types( + tag_key_prefix=match_pattern, tag_value_prefix=match_pattern, sources=sources, + limit=limit, db_session=db_session, ) server_tags = [ @@ -108,19 +115,6 @@ def get_tags( return TagResponse(tags=server_tags) -@basic_router.post("/search-intent") -def get_search_type( - simple_query: SimpleQueryRequest, - _: User = Depends(current_user), - db_session: Session = Depends(get_session), -) -> HelperResponse: - logger.info(f"Calculating intent for {simple_query.query}") - embedding_model = get_current_db_embedding_model(db_session) - return recommend_search_flow( - simple_query.query, model_name=embedding_model.model_name - ) - - @basic_router.post("/query-validation") def query_validation( simple_query: SimpleQueryRequest, _: User = Depends(current_user) @@ -128,11 +122,108 @@ def query_validation( # Note if weak model prompt is chosen, this check does not occur and will simply return that # the query is valid, this is because weaker models cannot really handle this task well. # Additionally, some weak model servers cannot handle concurrent inferences. - logger.info(f"Validating query: {simple_query.query}") + logger.notice(f"Validating query: {simple_query.query}") reasoning, answerable = get_query_answerability(simple_query.query) return QueryValidationResponse(reasoning=reasoning, answerable=answerable) +@basic_router.get("/user-searches") +def get_user_search_sessions( + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> ChatSessionsResponse: + user_id = user.id if user is not None else None + + try: + search_sessions = get_chat_sessions_by_user( + user_id=user_id, deleted=False, db_session=db_session, only_one_shot=True + ) + except ValueError: + raise HTTPException( + status_code=404, detail="Chat session does not exist or has been deleted" + ) + + search_session_ids = [chat.id for chat in search_sessions] + first_messages = get_first_messages_for_chat_sessions( + search_session_ids, db_session + ) + first_messages_dict = dict(first_messages) + + response = ChatSessionsResponse( + sessions=[ + ChatSessionDetails( + id=search.id, + name=first_messages_dict.get(search.id, search.description), + persona_id=search.persona_id, + time_created=search.time_created.isoformat(), + shared_status=search.shared_status, + folder_id=search.folder_id, + current_alternate_model=search.current_alternate_model, + ) + for search in search_sessions + ] + ) + return response + + +@basic_router.get("/search-session/{session_id}") +def get_search_session( + session_id: int, + is_shared: bool = False, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> SearchSessionDetailResponse: + user_id = user.id if user is not None else None + + try: + search_session = get_chat_session_by_id( + chat_session_id=session_id, + user_id=user_id, + db_session=db_session, + is_shared=is_shared, + ) + except ValueError: + raise ValueError("Search session does not exist or has been deleted") + + session_messages = get_chat_messages_by_session( + chat_session_id=session_id, + user_id=user_id, + db_session=db_session, + # we already did a permission check above with the call to + # `get_chat_session_by_id`, so we can skip it here + skip_permission_check=True, + # we need the tool call objs anyways, so just fetch them in a single call + prefetch_tool_calls=True, + ) + docs_response: list[SearchDoc] = [] + for message in session_messages: + if ( + message.message_type == MessageType.ASSISTANT + or message.message_type == MessageType.SYSTEM + ): + docs = get_search_docs_for_chat_message( + db_session=db_session, chat_message_id=message.id + ) + for doc in docs: + server_doc = translate_db_search_doc_to_server_search_doc(doc) + docs_response.append(server_doc) + + response = SearchSessionDetailResponse( + search_session_id=session_id, + description=search_session.description, + documents=docs_response, + messages=[ + translate_db_message_to_chat_message_detail( + msg, remove_doc_content=is_shared # if shared, don't leak doc content + ) + for msg in session_messages + ], + ) + return response + + +# NOTE No longer used, after search/chat redesign. +# No search responses are answered with a conversational generative AI response @basic_router.post("/stream-query-validation") def stream_query_validation( simple_query: SimpleQueryRequest, _: User = Depends(current_user) @@ -140,7 +231,7 @@ def stream_query_validation( # Note if weak model prompt is chosen, this check does not occur and will simply return that # the query is valid, this is because weaker models cannot really handle this task well. # Additionally, some weak model servers cannot handle concurrent inferences. - logger.info(f"Validating query: {simple_query.query}") + logger.notice(f"Validating query: {simple_query.query}") return StreamingResponse( stream_query_answerability(simple_query.query), media_type="application/json" ) @@ -153,7 +244,9 @@ def get_answer_with_quote( _: None = Depends(check_token_rate_limits), ) -> StreamingResponse: query = query_request.messages[0].message - logger.info(f"Received query for one shot answer with quotes: {query}") + + logger.notice(f"Received query for one shot answer with quotes: {query}") + packets = stream_search_answer( query_req=query_request, user=user, diff --git a/backend/danswer/server/query_and_chat/token_limit.py b/backend/danswer/server/query_and_chat/token_limit.py index b44eec3a64c..3f5d76bac7f 100644 --- a/backend/danswer/server/query_and_chat/token_limit.py +++ b/backend/danswer/server/query_and_chat/token_limit.py @@ -123,7 +123,7 @@ def _is_rate_limited( def any_rate_limit_exists() -> bool: """Checks if any rate limit exists in the database. Is cached, so that if no rate limits are setup, we don't have any effect on average query latency.""" - logger.info("Checking for any rate limits...") + logger.debug("Checking for any rate limits...") with get_session_context_manager() as db_session: return ( db_session.scalar( diff --git a/backend/danswer/server/settings/api.py b/backend/danswer/server/settings/api.py index 422e268c13e..3330f6cc5ff 100644 --- a/backend/danswer/server/settings/api.py +++ b/backend/danswer/server/settings/api.py @@ -1,13 +1,35 @@ +from typing import cast + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user +from danswer.auth.users import is_user_admin +from danswer.configs.constants import KV_REINDEX_KEY +from danswer.configs.constants import NotificationType +from danswer.db.engine import get_session from danswer.db.models import User +from danswer.db.notification import create_notification +from danswer.db.notification import dismiss_all_notifications +from danswer.db.notification import dismiss_notification +from danswer.db.notification import get_notification_by_id +from danswer.db.notification import get_notifications +from danswer.db.notification import update_notification_last_shown +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.server.settings.models import Notification from danswer.server.settings.models import Settings +from danswer.server.settings.models import UserSettings from danswer.server.settings.store import load_settings from danswer.server.settings.store import store_settings +from danswer.utils.logger import setup_logger + + +logger = setup_logger() admin_router = APIRouter(prefix="/admin/settings") @@ -26,5 +48,98 @@ def put_settings( @basic_router.get("") -def fetch_settings(_: User | None = Depends(current_user)) -> Settings: - return load_settings() +def fetch_settings( + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserSettings: + """Settings and notifications are stuffed into this single endpoint to reduce number of + Postgres calls""" + general_settings = load_settings() + user_notifications = get_user_notifications(user, db_session) + + try: + kv_store = get_dynamic_config_store() + needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY)) + except ConfigNotFoundError: + needs_reindexing = False + + return UserSettings( + **general_settings.model_dump(), + notifications=user_notifications, + needs_reindexing=needs_reindexing + ) + + +@basic_router.post("/notifications/{notification_id}/dismiss") +def dismiss_notification_endpoint( + notification_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + try: + notification = get_notification_by_id(notification_id, user, db_session) + except PermissionError: + raise HTTPException( + status_code=403, detail="Not authorized to dismiss this notification" + ) + except ValueError: + raise HTTPException(status_code=404, detail="Notification not found") + + dismiss_notification(notification, db_session) + + +def get_user_notifications( + user: User | None, db_session: Session +) -> list[Notification]: + """Get notifications for the user, currently the logic is very specific to the reindexing flag""" + is_admin = is_user_admin(user) + if not is_admin: + # Reindexing flag should only be shown to admins, basic users can't trigger it anyway + return [] + + kv_store = get_dynamic_config_store() + try: + needs_index = cast(bool, kv_store.load(KV_REINDEX_KEY)) + if not needs_index: + dismiss_all_notifications( + notif_type=NotificationType.REINDEX, db_session=db_session + ) + return [] + except ConfigNotFoundError: + # If something goes wrong and the flag is gone, better to not start a reindexing + # it's a heavyweight long running job and maybe this flag is cleaned up later + logger.warning("Could not find reindex flag") + return [] + + try: + # Need a transaction in order to prevent under-counting current notifications + db_session.begin() + + reindex_notifs = get_notifications( + user=user, notif_type=NotificationType.REINDEX, db_session=db_session + ) + + if not reindex_notifs: + notif = create_notification( + user=user, + notif_type=NotificationType.REINDEX, + db_session=db_session, + ) + db_session.flush() + db_session.commit() + return [Notification.from_model(notif)] + + if len(reindex_notifs) > 1: + logger.error("User has multiple reindex notifications") + + reindex_notif = reindex_notifs[0] + update_notification_last_shown( + notification=reindex_notif, db_session=db_session + ) + + db_session.commit() + return [Notification.from_model(reindex_notif)] + except SQLAlchemyError: + logger.exception("Error while processing notifications") + db_session.rollback() + return [] diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py index 9afacf5add2..e999e7294e9 100644 --- a/backend/danswer/server/settings/models.py +++ b/backend/danswer/server/settings/models.py @@ -1,13 +1,35 @@ +from datetime import datetime from enum import Enum from pydantic import BaseModel +from danswer.configs.constants import NotificationType +from danswer.db.models import Notification as NotificationDBModel + class PageType(str, Enum): CHAT = "chat" SEARCH = "search" +class Notification(BaseModel): + id: int + notif_type: NotificationType + dismissed: bool + last_shown: datetime + first_shown: datetime + + @classmethod + def from_model(cls, notif: NotificationDBModel) -> "Notification": + return cls( + id=notif.id, + notif_type=notif.notif_type, + dismissed=notif.dismissed, + last_shown=notif.last_shown, + first_shown=notif.first_shown, + ) + + class Settings(BaseModel): """General settings""" @@ -35,3 +57,8 @@ def check_validity(self) -> None: raise ValueError( "The default page cannot be 'search' if the search page is disabled." ) + + +class UserSettings(Settings): + notifications: list[Notification] + needs_reindexing: bool diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py index ead1e3652a9..6f2872f40f9 100644 --- a/backend/danswer/server/settings/store.py +++ b/backend/danswer/server/settings/store.py @@ -1,23 +1,21 @@ from typing import cast +from danswer.configs.constants import KV_SETTINGS_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.server.settings.models import Settings -_SETTINGS_KEY = "danswer_settings" - - def load_settings() -> Settings: dynamic_config_store = get_dynamic_config_store() try: - settings = Settings(**cast(dict, dynamic_config_store.load(_SETTINGS_KEY))) + settings = Settings(**cast(dict, dynamic_config_store.load(KV_SETTINGS_KEY))) except ConfigNotFoundError: settings = Settings() - dynamic_config_store.store(_SETTINGS_KEY, settings.dict()) + dynamic_config_store.store(KV_SETTINGS_KEY, settings.model_dump()) return settings def store_settings(settings: Settings) -> None: - get_dynamic_config_store().store(_SETTINGS_KEY, settings.dict()) + get_dynamic_config_store().store(KV_SETTINGS_KEY, settings.model_dump()) diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py index 68fae10c061..99b2ae3bbb6 100644 --- a/backend/danswer/tools/built_in_tools.py +++ b/backend/danswer/tools/built_in_tools.py @@ -1,6 +1,6 @@ import os from typing import Type -from typing import TypedDict +from typing_extensions import TypedDict # noreorder from sqlalchemy import not_ from sqlalchemy import or_ @@ -77,7 +77,7 @@ def load_builtin_tools(db_session: Session) -> None: tool.name = tool_name tool.description = tool_info["description"] tool.display_name = tool_info["display_name"] - logger.info(f"Updated tool: {tool_name}") + logger.notice(f"Updated tool: {tool_name}") else: # Add new tool new_tool = ToolDBModel( @@ -87,17 +87,17 @@ def load_builtin_tools(db_session: Session) -> None: in_code_tool_id=tool_info["in_code_tool_id"], ) db_session.add(new_tool) - logger.info(f"Added new tool: {tool_name}") + logger.notice(f"Added new tool: {tool_name}") # Remove tools that are no longer in BUILT_IN_TOOLS built_in_ids = {tool_info["in_code_tool_id"] for tool_info in BUILT_IN_TOOLS} for tool_id, tool in list(in_code_tool_id_to_tool.items()): if tool_id not in built_in_ids: db_session.delete(tool) - logger.info(f"Removed tool no longer in built-in list: {tool.name}") + logger.notice(f"Removed tool no longer in built-in list: {tool.name}") db_session.commit() - logger.info("All built-in tools are loaded/verified.") + logger.notice("All built-in tools are loaded/verified.") def auto_add_search_tool_to_personas(db_session: Session) -> None: @@ -140,11 +140,11 @@ def auto_add_search_tool_to_personas(db_session: Session) -> None: for persona in personas_to_update: if search_tool not in persona.tools: persona.tools.append(search_tool) - logger.info(f"Added SearchTool to Persona ID: {persona.id}") + logger.notice(f"Added SearchTool to Persona ID: {persona.id}") # Commit changes to the database db_session.commit() - logger.info("Completed adding SearchTool to relevant Personas.") + logger.notice("Completed adding SearchTool to relevant Personas.") _built_in_tools_cache: dict[int, Type[Tool]] | None = None diff --git a/backend/danswer/tools/custom/base_tool_types.py b/backend/danswer/tools/custom/base_tool_types.py new file mode 100644 index 00000000000..7bef9a572c5 --- /dev/null +++ b/backend/danswer/tools/custom/base_tool_types.py @@ -0,0 +1,2 @@ +# should really be `JSON_ro`, but this causes issues with pydantic +ToolResultType = dict | list | str | int | float | bool diff --git a/backend/danswer/tools/custom/custom_tool.py b/backend/danswer/tools/custom/custom_tool.py index 84f10c3ec05..f7cbf236f2b 100644 --- a/backend/danswer/tools/custom/custom_tool.py +++ b/backend/danswer/tools/custom/custom_tool.py @@ -11,6 +11,7 @@ from danswer.dynamic_configs.interface import JSON_ro from danswer.llm.answering.models import PreviousMessage from danswer.llm.interfaces import LLM +from danswer.tools.custom.base_tool_types import ToolResultType from danswer.tools.custom.custom_tool_prompts import ( SHOULD_USE_CUSTOM_TOOL_SYSTEM_PROMPT, ) @@ -34,7 +35,7 @@ class CustomToolCallSummary(BaseModel): tool_name: str - tool_result: dict + tool_result: ToolResultType class CustomTool(Tool): diff --git a/backend/danswer/tools/custom/openapi_parsing.py b/backend/danswer/tools/custom/openapi_parsing.py index 40ed5544d8b..b40ea170ceb 100644 --- a/backend/danswer/tools/custom/openapi_parsing.py +++ b/backend/danswer/tools/custom/openapi_parsing.py @@ -1,7 +1,7 @@ from typing import Any from typing import cast -from openai import BaseModel +from pydantic import BaseModel REQUEST_BODY = "requestBody" diff --git a/backend/danswer/tools/force.py b/backend/danswer/tools/force.py index 53f7997d6ed..445175f3e72 100644 --- a/backend/danswer/tools/force.py +++ b/backend/danswer/tools/force.py @@ -1,13 +1,15 @@ from typing import Any -from langchain_core.messages import AIMessage -from langchain_core.messages import BaseMessage from pydantic import BaseModel from danswer.tools.tool import Tool class ForceUseTool(BaseModel): + # Could be not a forced usage of the tool but still have args, in which case + # if the tool is called, then those args are applied instead of what the LLM + # wanted to call it with + force_use: bool tool_name: str args: dict[str, Any] | None = None @@ -16,25 +18,10 @@ def build_openai_tool_choice_dict(self) -> dict[str, Any]: return {"type": "function", "function": {"name": self.tool_name}} -def modify_message_chain_for_force_use_tool( - messages: list[BaseMessage], force_use_tool: ForceUseTool | None = None -) -> list[BaseMessage]: - """NOTE: modifies `messages` in place.""" - if not force_use_tool: - return messages - - for message in messages: - if isinstance(message, AIMessage) and message.tool_calls: - for tool_call in message.tool_calls: - tool_call["args"] = force_use_tool.args or {} - - return messages - - def filter_tools_for_force_tool_use( - tools: list[Tool], force_use_tool: ForceUseTool | None = None + tools: list[Tool], force_use_tool: ForceUseTool ) -> list[Tool]: - if not force_use_tool: + if not force_use_tool.force_use: return tools return [tool for tool in tools if tool.name == force_use_tool.tool_name] diff --git a/backend/danswer/tools/images/image_generation_tool.py b/backend/danswer/tools/images/image_generation_tool.py index ed145a55cf2..fe839b7d68c 100644 --- a/backend/danswer/tools/images/image_generation_tool.py +++ b/backend/danswer/tools/images/image_generation_tool.py @@ -1,5 +1,6 @@ import json from collections.abc import Generator +from enum import Enum from typing import Any from typing import cast @@ -20,6 +21,7 @@ from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + logger = setup_logger() @@ -54,6 +56,12 @@ class ImageGenerationResponse(BaseModel): url: str +class ImageShape(str, Enum): + SQUARE = "square" + PORTRAIT = "portrait" + LANDSCAPE = "landscape" + + class ImageGenerationTool(Tool): _NAME = "run_image_generation" _DESCRIPTION = "Generate an image from a prompt." @@ -102,6 +110,11 @@ def tool_definition(self) -> dict: "type": "string", "description": "Prompt used to generate the image", }, + "shape": { + "type": "string", + "description": "Optional. Image shape: 'square', 'portrait', or 'landscape'", + "enum": [shape.value for shape in ImageShape], + }, }, "required": ["prompt"], }, @@ -156,34 +169,79 @@ def build_tool_message_content( for image_generation in image_generations ] ), - img_urls=[image_generation.url for image_generation in image_generations], + # NOTE: we can't pass in the image URLs here, since OpenAI doesn't allow + # Tool messages to contain images + # img_urls=[image_generation.url for image_generation in image_generations], ) - def _generate_image(self, prompt: str) -> ImageGenerationResponse: - response = image_generation( - prompt=prompt, - model=self.model, - api_key=self.api_key, - # need to pass in None rather than empty str - api_base=self.api_base or None, - api_version=self.api_version or None, - n=1, - extra_headers=build_llm_extra_headers(self.additional_headers), - ) - return ImageGenerationResponse( - revised_prompt=response.data[0]["revised_prompt"], - url=response.data[0]["url"], - ) + def _generate_image( + self, prompt: str, shape: ImageShape + ) -> ImageGenerationResponse: + if shape == ImageShape.LANDSCAPE: + size = "1792x1024" + elif shape == ImageShape.PORTRAIT: + size = "1024x1792" + else: + size = "1024x1024" + + try: + response = image_generation( + prompt=prompt, + model=self.model, + api_key=self.api_key, + # need to pass in None rather than empty str + api_base=self.api_base or None, + api_version=self.api_version or None, + size=size, + n=1, + extra_headers=build_llm_extra_headers(self.additional_headers), + ) + return ImageGenerationResponse( + revised_prompt=response.data[0]["revised_prompt"], + url=response.data[0]["url"], + ) + except Exception as e: + logger.debug(f"Error occured during image generation: {e}") + + error_message = str(e) + if "OpenAIException" in str(type(e)): + if ( + "Your request was rejected as a result of our safety system" + in error_message + ): + raise ValueError( + "The image generation request was rejected due to OpenAI's content policy. Please try a different prompt." + ) + elif "Invalid image URL" in error_message: + raise ValueError("Invalid image URL provided for image generation.") + elif "invalid_request_error" in error_message: + raise ValueError( + "Invalid request for image generation. Please check your input." + ) + + raise ValueError( + "An error occurred during image generation. Please try again later." + ) def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: prompt = cast(str, kwargs["prompt"]) + shape = ImageShape(kwargs.get("shape", ImageShape.SQUARE)) # dalle3 only supports 1 image at a time, which is why we have to # parallelize this via threading results = cast( list[ImageGenerationResponse], run_functions_tuples_in_parallel( - [(self._generate_image, (prompt,)) for _ in range(self.num_imgs)] + [ + ( + self._generate_image, + ( + prompt, + shape, + ), + ) + for _ in range(self.num_imgs) + ] ), ) yield ToolResponse( @@ -196,6 +254,6 @@ def final_result(self, *args: ToolResponse) -> JSON_ro: list[ImageGenerationResponse], args[0].response ) return [ - image_generation_response.dict() + image_generation_response.model_dump() for image_generation_response in image_generation_responses ] diff --git a/backend/danswer/tools/images/prompt.py b/backend/danswer/tools/images/prompt.py index dee28b49c84..bb729bfcd1c 100644 --- a/backend/danswer/tools/images/prompt.py +++ b/backend/danswer/tools/images/prompt.py @@ -3,31 +3,19 @@ from danswer.llm.utils import build_content_with_imgs -NON_TOOL_CALLING_PROMPT = """ -You have just created the attached images in response to the following query: "{{query}}". +IMG_GENERATION_SUMMARY_PROMPT = """ +You have just created the attached images in response to the following query: "{query}". -Can you please summarize them in a sentence or two? -""" - -TOOL_CALLING_PROMPT = """ -Can you please summarize the two images you generate in a sentence or two? +Can you please summarize them in a sentence or two? Do NOT include image urls or bulleted lists. """ def build_image_generation_user_prompt( query: str, img_urls: list[str] | None = None ) -> HumanMessage: - if img_urls: - return HumanMessage( - content=build_content_with_imgs( - message=NON_TOOL_CALLING_PROMPT.format(query=query).strip(), - img_urls=img_urls, - ) - ) - return HumanMessage( content=build_content_with_imgs( - message=TOOL_CALLING_PROMPT.strip(), + message=IMG_GENERATION_SUMMARY_PROMPT.format(query=query).strip(), img_urls=img_urls, ) ) diff --git a/backend/danswer/tools/internet_search/internet_search_tool.py b/backend/danswer/tools/internet_search/internet_search_tool.py index 0f92deadd04..2640afcdf83 100644 --- a/backend/danswer/tools/internet_search/internet_search_tool.py +++ b/backend/danswer/tools/internet_search/internet_search_tool.py @@ -187,7 +187,7 @@ def build_tool_message_content( self, *args: ToolResponse ) -> str | list[str | dict[str, Any]]: search_response = cast(InternetSearchResponse, args[0].response) - return json.dumps(search_response.dict()) + return json.dumps(search_response.model_dump()) def _perform_search(self, query: str) -> InternetSearchResponse: response = self.client.get( @@ -230,4 +230,4 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: def final_result(self, *args: ToolResponse) -> JSON_ro: search_response = cast(InternetSearchResponse, args[0].response) - return search_response.dict() + return search_response.model_dump() diff --git a/backend/danswer/tools/message.py b/backend/danswer/tools/message.py index cdf86a23b05..b0259c29b2a 100644 --- a/backend/danswer/tools/message.py +++ b/backend/danswer/tools/message.py @@ -4,9 +4,11 @@ from langchain_core.messages.ai import AIMessage from langchain_core.messages.tool import ToolCall from langchain_core.messages.tool import ToolMessage -from pydantic import BaseModel +from pydantic.v1 import BaseModel as BaseModel__v1 -from danswer.llm.utils import get_default_llm_tokenizer +from danswer.natural_language_processing.utils import BaseTokenizer + +# Langchain has their own version of pydantic which is version 1 def build_tool_message( @@ -19,14 +21,14 @@ def build_tool_message( ) -class ToolCallSummary(BaseModel): +class ToolCallSummary(BaseModel__v1): tool_call_request: AIMessage tool_call_result: ToolMessage -def tool_call_tokens(tool_call_summary: ToolCallSummary) -> int: - llm_tokenizer = get_default_llm_tokenizer() - +def tool_call_tokens( + tool_call_summary: ToolCallSummary, llm_tokenizer: BaseTokenizer +) -> int: request_tokens = len( llm_tokenizer.encode( json.dumps(tool_call_summary.tool_call_request.tool_calls[0]["args"]) diff --git a/backend/danswer/tools/models.py b/backend/danswer/tools/models.py index 53940dcea49..052e4293a53 100644 --- a/backend/danswer/tools/models.py +++ b/backend/danswer/tools/models.py @@ -1,12 +1,12 @@ from typing import Any from pydantic import BaseModel -from pydantic import root_validator +from pydantic import model_validator class ToolResponse(BaseModel): id: str | None = None - response: Any + response: Any = None class ToolCallKickoff(BaseModel): @@ -19,12 +19,10 @@ class ToolRunnerResponse(BaseModel): tool_response: ToolResponse | None = None tool_message_content: str | list[str | dict[str, Any]] | None = None - @root_validator - def validate_tool_runner_response( - cls, values: dict[str, ToolResponse | str] - ) -> dict[str, ToolResponse | str]: + @model_validator(mode="after") + def validate_tool_runner_response(self) -> "ToolRunnerResponse": fields = ["tool_response", "tool_message_content", "tool_run_kickoff"] - provided = sum(1 for field in fields if values.get(field) is not None) + provided = sum(1 for field in fields if getattr(self, field) is not None) if provided != 1: raise ValueError( @@ -32,8 +30,10 @@ def validate_tool_runner_response( "or 'tool_run_kickoff' must be provided" ) - return values + return self class ToolCallFinalResult(ToolCallKickoff): - tool_result: Any # we would like to use JSON_ro, but can't due to its recursive nature + tool_result: Any = ( + None # we would like to use JSON_ro, but can't due to its recursive nature + ) diff --git a/backend/danswer/tools/search/search_tool.py b/backend/danswer/tools/search/search_tool.py index 44c5001d6cf..13d3a304b06 100644 --- a/backend/danswer/tools/search/search_tool.py +++ b/backend/danswer/tools/search/search_tool.py @@ -10,14 +10,22 @@ from danswer.chat.models import DanswerContext from danswer.chat.models import DanswerContexts from danswer.chat.models import LlmDoc +from danswer.chat.models import SectionRelevancePiece +from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE +from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW +from danswer.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from danswer.db.models import Persona from danswer.db.models import User from danswer.dynamic_configs.interface import JSON_ro +from danswer.llm.answering.models import ContextualPruningConfig from danswer.llm.answering.models import DocumentPruningConfig from danswer.llm.answering.models import PreviousMessage from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.citations_prompt import compute_max_llm_input_tokens from danswer.llm.answering.prune_and_merge import prune_and_merge_sections +from danswer.llm.answering.prune_and_merge import prune_sections from danswer.llm.interfaces import LLM +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType from danswer.search.models import IndexFilters @@ -30,11 +38,15 @@ from danswer.tools.search.search_utils import llm_doc_to_dict from danswer.tools.tool import Tool from danswer.tools.tool import ToolResponse +from danswer.utils.logger import setup_logger + +logger = setup_logger() SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary" SEARCH_DOC_CONTENT_ID = "search_doc_content" SECTION_RELEVANCE_LIST_ID = "section_relevance_list" FINAL_CONTEXT_DOCUMENTS = "final_context_documents" +SEARCH_EVALUATION_ID = "llm_doc_eval" class SearchResponseSummary(BaseModel): @@ -73,11 +85,12 @@ def __init__( llm: LLM, fast_llm: LLM, pruning_config: DocumentPruningConfig, + evaluation_type: LLMEvaluationType, # if specified, will not actually run a search and will instead return these # sections. Used when the user selects specific docs to talk to selected_sections: list[InferenceSection] | None = None, - chunks_above: int = 0, - chunks_below: int = 0, + chunks_above: int | None = None, + chunks_below: int | None = None, full_doc: bool = False, bypass_acl: bool = False, ) -> None: @@ -87,16 +100,48 @@ def __init__( self.prompt_config = prompt_config self.llm = llm self.fast_llm = fast_llm - self.pruning_config = pruning_config + self.evaluation_type = evaluation_type self.selected_sections = selected_sections - self.chunks_above = chunks_above - self.chunks_below = chunks_below self.full_doc = full_doc self.bypass_acl = bypass_acl self.db_session = db_session + self.chunks_above = ( + chunks_above + if chunks_above is not None + else ( + persona.chunks_above + if persona.chunks_above is not None + else CONTEXT_CHUNKS_ABOVE + ) + ) + self.chunks_below = ( + chunks_below + if chunks_below is not None + else ( + persona.chunks_below + if persona.chunks_below is not None + else CONTEXT_CHUNKS_BELOW + ) + ) + + # For small context models, don't include additional surrounding context + # The 3 here for at least minimum 1 above, 1 below and 1 for the middle chunk + max_llm_tokens = compute_max_llm_input_tokens(self.llm.config) + if max_llm_tokens < 3 * GEN_AI_MODEL_FALLBACK_MAX_TOKENS: + self.chunks_above = 0 + self.chunks_below = 0 + + num_chunk_multiple = self.chunks_above + self.chunks_below + 1 + + self.contextual_pruning_config = ( + ContextualPruningConfig.from_doc_pruning_config( + num_chunk_multiple=num_chunk_multiple, doc_pruning_config=pruning_config + ) + ) + @property def name(self) -> str: return self._NAME @@ -172,7 +217,7 @@ def _build_response_for_specified_sections( self, query: str ) -> Generator[ToolResponse, None, None]: if self.selected_sections is None: - raise ValueError("sections must be specified") + raise ValueError("Sections must be specified") yield ToolResponse( id=SEARCH_RESPONSE_SUMMARY_ID, @@ -185,9 +230,20 @@ def _build_response_for_specified_sections( recency_bias_multiplier=1.0, ), ) + + # Build selected sections for specified documents + selected_sections = [ + SectionRelevancePiece( + relevant=True, + document_id=section.center_chunk.document_id, + chunk_id=section.center_chunk.chunk_id, + ) + for section in self.selected_sections + ] + yield ToolResponse( id=SECTION_RELEVANCE_LIST_ID, - response=[i for i in range(len(self.selected_sections))], + response=selected_sections, ) final_context_sections = prune_and_merge_sections( @@ -196,12 +252,14 @@ def _build_response_for_specified_sections( prompt_config=self.prompt_config, llm_config=self.llm.config, question=query, - document_pruning_config=self.pruning_config, + contextual_pruning_config=self.contextual_pruning_config, ) + llm_docs = [ llm_doc_from_inference_section(section) for section in final_context_sections ] + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: @@ -214,38 +272,44 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: search_pipeline = SearchPipeline( search_request=SearchRequest( query=query, + evaluation_type=self.evaluation_type, human_selected_filters=( self.retrieval_options.filters if self.retrieval_options else None ), persona=self.persona, - offset=self.retrieval_options.offset - if self.retrieval_options - else None, + offset=( + self.retrieval_options.offset if self.retrieval_options else None + ), limit=self.retrieval_options.limit if self.retrieval_options else None, chunks_above=self.chunks_above, chunks_below=self.chunks_below, full_doc=self.full_doc, - enable_auto_detect_filters=self.retrieval_options.enable_auto_detect_filters - if self.retrieval_options - else None, + enable_auto_detect_filters=( + self.retrieval_options.enable_auto_detect_filters + if self.retrieval_options + else None + ), ), user=self.user, llm=self.llm, fast_llm=self.fast_llm, bypass_acl=self.bypass_acl, db_session=self.db_session, + prompt_config=self.prompt_config, ) + yield ToolResponse( id=SEARCH_RESPONSE_SUMMARY_ID, response=SearchResponseSummary( rephrased_query=query, - top_sections=search_pipeline.reranked_sections, + top_sections=search_pipeline.final_context_sections, predicted_flow=search_pipeline.predicted_flow, predicted_search=search_pipeline.predicted_search_type, final_filters=search_pipeline.search_query.filters, recency_bias_multiplier=search_pipeline.search_query.recency_bias_multiplier, ), ) + yield ToolResponse( id=SEARCH_DOC_CONTENT_ID, response=DanswerContexts( @@ -260,23 +324,23 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: ] ), ) + yield ToolResponse( id=SECTION_RELEVANCE_LIST_ID, - response=search_pipeline.relevant_section_indices, + response=search_pipeline.section_relevance, ) - final_context_sections = prune_and_merge_sections( - sections=search_pipeline.reranked_sections, + pruned_sections = prune_sections( + sections=search_pipeline.final_context_sections, section_relevance_list=search_pipeline.section_relevance_list, prompt_config=self.prompt_config, llm_config=self.llm.config, question=query, - document_pruning_config=self.pruning_config, + contextual_pruning_config=self.contextual_pruning_config, ) llm_docs = [ - llm_doc_from_inference_section(section) - for section in final_context_sections + llm_doc_from_inference_section(section) for section in pruned_sections ] yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) diff --git a/backend/danswer/tools/utils.py b/backend/danswer/tools/utils.py index 7fb2156df59..9e20105edef 100644 --- a/backend/danswer/tools/utils.py +++ b/backend/danswer/tools/utils.py @@ -1,12 +1,16 @@ import json -from tiktoken import Encoding - -from danswer.llm.utils import get_default_llm_tokenizer +from danswer.natural_language_processing.utils import BaseTokenizer from danswer.tools.tool import Tool -OPEN_AI_TOOL_CALLING_MODELS = {"gpt-3.5-turbo", "gpt-4-turbo", "gpt-4"} +OPEN_AI_TOOL_CALLING_MODELS = { + "gpt-3.5-turbo", + "gpt-4-turbo", + "gpt-4", + "gpt-4o", + "gpt-4o-mini", +} def explicit_tool_calling_supported(model_provider: str, model_name: str) -> bool: @@ -16,13 +20,9 @@ def explicit_tool_calling_supported(model_provider: str, model_name: str) -> boo return False -def compute_tool_tokens(tool: Tool, llm_tokenizer: Encoding | None = None) -> int: - if not llm_tokenizer: - llm_tokenizer = get_default_llm_tokenizer() +def compute_tool_tokens(tool: Tool, llm_tokenizer: BaseTokenizer) -> int: return len(llm_tokenizer.encode(json.dumps(tool.tool_definition()))) -def compute_all_tool_tokens( - tools: list[Tool], llm_tokenizer: Encoding | None = None -) -> int: +def compute_all_tool_tokens(tools: list[Tool], llm_tokenizer: BaseTokenizer) -> int: return sum(compute_tool_tokens(tool, llm_tokenizer) for tool in tools) diff --git a/backend/danswer/utils/acl.py b/backend/danswer/utils/acl.py deleted file mode 100644 index 5608530fac6..00000000000 --- a/backend/danswer/utils/acl.py +++ /dev/null @@ -1,69 +0,0 @@ -from threading import Thread - -from sqlalchemy import select -from sqlalchemy.orm import Session - -from danswer.access.access import get_access_for_documents -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.models import Document -from danswer.document_index.document_index_utils import get_both_index_names -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import UpdateRequest -from danswer.document_index.vespa.index import VespaIndex -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -_COMPLETED_ACL_UPDATE_KEY = "completed_acl_update" - - -def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None: - """Updates the ACL for all documents based on the state of Postgres.""" - dynamic_config_store = get_dynamic_config_store() - if should_check_if_already_done: - try: - # if entry is found, then we've already done this - dynamic_config_store.load(_COMPLETED_ACL_UPDATE_KEY) - return - except ConfigNotFoundError: - pass - - logger.info("Populating Access Control List fields in Vespa") - with Session(get_sqlalchemy_engine()) as db_session: - # for all documents, set the `access_control_list` field appropriately - # based on the state of Postgres - documents = db_session.scalars(select(Document)).all() - document_access_dict = get_access_for_documents( - db_session=db_session, - document_ids=[document.id for document in documents], - ) - - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - vespa_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - - if not isinstance(vespa_index, VespaIndex): - raise ValueError("This script is only for Vespa indexes") - - update_requests = [ - UpdateRequest( - document_ids=[document_id], - access=access, - ) - for document_id, access in document_access_dict.items() - ] - vespa_index.update(update_requests=update_requests) - - dynamic_config_store.store(_COMPLETED_ACL_UPDATE_KEY, True) - - -def set_acl_for_vespa_nonblocking(should_check_if_already_done: bool = False) -> None: - """Kick off the ACL update in a separate thread so that other work can continue.""" - Thread( - target=set_acl_for_vespa, - args=[should_check_if_already_done], - ).start() diff --git a/backend/danswer/utils/batching.py b/backend/danswer/utils/batching.py index 2ea436e1176..0200f72250a 100644 --- a/backend/danswer/utils/batching.py +++ b/backend/danswer/utils/batching.py @@ -21,10 +21,3 @@ def batch_generator( if pre_batch_yield: pre_batch_yield(batch) yield batch - - -def batch_list( - lst: list[T], - batch_size: int, -) -> list[list[T]]: - return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)] diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index 38e24a36728..a7751ca3dc7 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -1,9 +1,16 @@ import logging import os from collections.abc import MutableMapping +from logging.handlers import RotatingFileHandler from typing import Any +from shared_configs.configs import DEV_LOGGING_ENABLED +from shared_configs.configs import LOG_FILE_NAME from shared_configs.configs import LOG_LEVEL +from shared_configs.configs import SLACK_CHANNEL_ID + + +logging.addLevelName(logging.INFO + 5, "NOTICE") class IndexAttemptSingleton: @@ -27,48 +34,87 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int: "CRITICAL": logging.CRITICAL, "ERROR": logging.ERROR, "WARNING": logging.WARNING, + "NOTICE": logging.getLevelName("NOTICE"), "INFO": logging.INFO, "DEBUG": logging.DEBUG, "NOTSET": logging.NOTSET, } - return log_level_dict.get(log_level_str.upper(), logging.INFO) - + return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE")) -class _IndexAttemptLoggingAdapter(logging.LoggerAdapter): - """This is used to globally add the index attempt id to all log messages - during indexing by workers. This is done so that the logs can be filtered - by index attempt ID to get a better idea of what happened during a specific - indexing attempt. If the index attempt ID is not set, then this adapter - is a no-op.""" +class DanswerLoggingAdapter(logging.LoggerAdapter): def process( self, msg: str, kwargs: MutableMapping[str, Any] ) -> tuple[str, MutableMapping[str, Any]]: + # If this is an indexing job, add the attempt ID to the log message + # This helps filter the logs for this specific indexing attempt_id = IndexAttemptSingleton.get_index_attempt_id() - if attempt_id is None: - return msg, kwargs + if attempt_id is not None: + msg = f"[Attempt ID: {attempt_id}] {msg}" - return f"[Attempt ID: {attempt_id}] {msg}", kwargs + # For Slack Bot, logs the channel relevant to the request + channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None + if channel_id: + msg = f"[Channel ID: {channel_id}] {msg}" + + return msg, kwargs + + def notice(self, msg: Any, *args: Any, **kwargs: Any) -> None: + # Stacklevel is set to 2 to point to the actual caller of notice instead of here + self.log( + logging.getLevelName("NOTICE"), str(msg), *args, **kwargs, stacklevel=2 + ) + + +class ColoredFormatter(logging.Formatter): + """Custom formatter to add colors to log levels.""" + + COLORS = { + "CRITICAL": "\033[91m", # Red + "ERROR": "\033[91m", # Red + "WARNING": "\033[93m", # Yellow + "NOTICE": "\033[94m", # Blue + "INFO": "\033[92m", # Green + "DEBUG": "\033[96m", # Light Green + "NOTSET": "\033[91m", # Reset + } + + def format(self, record: logging.LogRecord) -> str: + levelname = record.levelname + if levelname in self.COLORS: + prefix = self.COLORS[levelname] + suffix = "\033[0m" + formatted_message = super().format(record) + # Ensure the levelname with colon is 9 characters long + # accounts for the extra characters for coloring + level_display = f"{prefix}{levelname}{suffix}:" + return f"{level_display.ljust(18)} {formatted_message}" + return super().format(record) + + +def get_standard_formatter() -> ColoredFormatter: + """Returns a standard colored logging formatter.""" + return ColoredFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) def setup_logger( name: str = __name__, log_level: int = get_log_level_from_str(), - logfile_name: str | None = None, -) -> logging.LoggerAdapter: + extra: MutableMapping[str, Any] | None = None, +) -> DanswerLoggingAdapter: logger = logging.getLogger(name) # If the logger already has handlers, assume it was already configured and return it. if logger.handlers: - return _IndexAttemptLoggingAdapter(logger) + return DanswerLoggingAdapter(logger, extra=extra) logger.setLevel(log_level) - formatter = logging.Formatter( - "%(asctime)s %(filename)20s%(lineno)4s : %(message)s", - datefmt="%m/%d/%Y %I:%M:%S %p", - ) + formatter = get_standard_formatter() handler = logging.StreamHandler() handler.setLevel(log_level) @@ -76,12 +122,33 @@ def setup_logger( logger.addHandler(handler) - if logfile_name: - is_containerized = os.path.exists("/.dockerenv") - file_name_template = ( - "/var/log/{name}.log" if is_containerized else "./log/{name}.log" - ) - file_handler = logging.FileHandler(file_name_template.format(name=logfile_name)) - logger.addHandler(file_handler) - - return _IndexAttemptLoggingAdapter(logger) + uvicorn_logger = logging.getLogger("uvicorn.access") + if uvicorn_logger: + uvicorn_logger.handlers = [] + uvicorn_logger.addHandler(handler) + uvicorn_logger.setLevel(log_level) + + is_containerized = os.path.exists("/.dockerenv") + if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED): + log_levels = ["debug", "info", "notice"] + for level in log_levels: + file_name = ( + f"/var/log/{LOG_FILE_NAME}_{level}.log" + if is_containerized + else f"./log/{LOG_FILE_NAME}_{level}.log" + ) + file_handler = RotatingFileHandler( + file_name, + maxBytes=25 * 1024 * 1024, # 25 MB + backupCount=5, # Keep 5 backup files + ) + file_handler.setLevel(get_log_level_from_str(level)) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + if uvicorn_logger: + uvicorn_logger.addHandler(file_handler) + + logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore + + return DanswerLoggingAdapter(logger, extra=extra) diff --git a/backend/danswer/utils/sitemap.py b/backend/danswer/utils/sitemap.py new file mode 100644 index 00000000000..ababbec4575 --- /dev/null +++ b/backend/danswer/utils/sitemap.py @@ -0,0 +1,39 @@ +from datetime import datetime +from urllib import robotparser + +from usp.tree import sitemap_tree_for_homepage # type: ignore + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def test_url(rp: robotparser.RobotFileParser | None, url: str) -> bool: + if not rp: + return True + else: + return rp.can_fetch("*", url) + + +def init_robots_txt(site: str) -> robotparser.RobotFileParser: + ts = datetime.now().timestamp() + robots_url = f"{site}/robots.txt?ts={ts}" + rp = robotparser.RobotFileParser() + rp.set_url(robots_url) + rp.read() + return rp + + +def list_pages_for_site(site: str) -> list[str]: + rp: robotparser.RobotFileParser | None = None + try: + rp = init_robots_txt(site) + except Exception: + logger.warning("Failed to load robots.txt") + + tree = sitemap_tree_for_homepage(site) + + pages = [page.url for page in tree.all_pages() if test_url(rp, page.url)] + pages = list(dict.fromkeys(pages)) + + return pages diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index f2dbb8915c2..80fcba65a16 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -6,10 +6,10 @@ import requests from danswer.configs.app_configs import DISABLE_TELEMETRY +from danswer.configs.constants import KV_CUSTOMER_UUID_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError -CUSTOMER_UUID_KEY = "customer_uuid" DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" @@ -24,10 +24,10 @@ class RecordType(str, Enum): def get_or_generate_uuid() -> str: kv_store = get_dynamic_config_store() try: - return cast(str, kv_store.load(CUSTOMER_UUID_KEY)) + return cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) except ConfigNotFoundError: customer_id = str(uuid.uuid4()) - kv_store.store(CUSTOMER_UUID_KEY, customer_id, encrypt=True) + kv_store.store(KV_CUSTOMER_UUID_KEY, customer_id, encrypt=True) return customer_id diff --git a/backend/danswer/utils/threadpool_concurrency.py b/backend/danswer/utils/threadpool_concurrency.py index 463d43c1a7d..d8fc40a7d94 100644 --- a/backend/danswer/utils/threadpool_concurrency.py +++ b/backend/danswer/utils/threadpool_concurrency.py @@ -87,6 +87,7 @@ def run_functions_in_parallel( are the result_id of the FunctionCall and the values are the results of the call. """ results = {} + with ThreadPoolExecutor(max_workers=len(function_calls)) as executor: future_to_id = { executor.submit(func_call.execute): func_call.result_id diff --git a/backend/danswer/utils/timing.py b/backend/danswer/utils/timing.py index 2aa15095586..0d4eb7a14d4 100644 --- a/backend/danswer/utils/timing.py +++ b/backend/danswer/utils/timing.py @@ -29,14 +29,16 @@ def wrapped_func(*args: Any, **kwargs: Any) -> Any: start_time = time.time() user = kwargs.get("user") result = func(*args, **kwargs) - elapsed_time_str = str(time.time() - start_time) + elapsed_time = time.time() - start_time + elapsed_time_str = f"{elapsed_time:.3f}" log_name = func_name or func.__name__ args_str = f" args={args} kwargs={kwargs}" if include_args else "" final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds" if debug_only: logger.debug(final_log) else: - logger.info(final_log) + # These are generally more important logs so the level is a bit higher + logger.notice(final_log) if not print_only: optional_telemetry( diff --git a/backend/danswer/utils/variable_functionality.py b/backend/danswer/utils/variable_functionality.py index 2348c3271a7..97c6592601e 100644 --- a/backend/danswer/utils/variable_functionality.py +++ b/backend/danswer/utils/variable_functionality.py @@ -25,7 +25,7 @@ def get_is_ee_version(self) -> bool: def set_is_ee_based_on_env_variable() -> None: if ENTERPRISE_EDITION_ENABLED and not global_version.get_is_ee_version(): - logger.info("Enterprise Edition enabled") + logger.notice("Enterprise Edition enabled") global_version.set_ee() @@ -37,9 +37,24 @@ def fetch_versioned_implementation(module: str, attribute: str) -> Any: module_full = f"ee.{module}" if is_ee else module try: return getattr(importlib.import_module(module_full), attribute) - except ModuleNotFoundError: - # try the non-ee version as a fallback + except ModuleNotFoundError as e: + logger.warning( + "Failed to fetch versioned implementation for %s.%s: %s", + module_full, + attribute, + e, + ) + if is_ee: + if "ee.danswer" not in str(e): + # If it's a non Danswer related import failure, this is likely because + # a dependent library has not been installed. Should raise this failure + # instead of letting the server start up + raise e + + # Use the MIT version as a fallback, this allows us to develop MIT + # versions independently and later add additional EE functionality + # similar to feature flagging return getattr(importlib.import_module(module), attribute) raise @@ -53,11 +68,9 @@ def fetch_versioned_implementation_with_fallback( ) -> T: try: return fetch_versioned_implementation(module, attribute) - except Exception as e: - logger.warning( - "Failed to fetch versioned implementation for %s.%s: %s", - module, - attribute, - e, - ) + except Exception: return fallback + + +def noop_fallback(*args: Any, **kwargs: Any) -> None: + pass diff --git a/backend/ee/danswer/access/access.py b/backend/ee/danswer/access/access.py index 254e76e6681..c2b05ee881f 100644 --- a/backend/ee/danswer/access/access.py +++ b/backend/ee/danswer/access/access.py @@ -7,7 +7,6 @@ from danswer.access.models import DocumentAccess from danswer.access.utils import prefix_user_group from danswer.db.models import User -from danswer.server.documents.models import ConnectorCredentialPairIdentifier from ee.danswer.db.user_group import fetch_user_groups_for_documents from ee.danswer.db.user_group import fetch_user_groups_for_user @@ -15,19 +14,16 @@ def _get_access_for_documents( document_ids: list[str], db_session: Session, - cc_pair_to_delete: ConnectorCredentialPairIdentifier | None, ) -> dict[str, DocumentAccess]: non_ee_access_dict = get_access_for_documents_without_groups( document_ids=document_ids, db_session=db_session, - cc_pair_to_delete=cc_pair_to_delete, ) user_group_info = { document_id: group_names for document_id, group_names in fetch_user_groups_for_documents( db_session=db_session, document_ids=document_ids, - cc_pair_to_delete=cc_pair_to_delete, ) } diff --git a/backend/ee/danswer/auth/api_key.py b/backend/ee/danswer/auth/api_key.py index 9f9d2bac4bf..d4f99d13891 100644 --- a/backend/ee/danswer/auth/api_key.py +++ b/backend/ee/danswer/auth/api_key.py @@ -5,6 +5,7 @@ from passlib.hash import sha256_crypt from pydantic import BaseModel +from danswer.auth.schemas import UserRole from ee.danswer.configs.app_configs import API_KEY_HASH_ROUNDS @@ -19,6 +20,7 @@ class ApiKeyDescriptor(BaseModel): api_key_display: str api_key: str | None = None # only present on initial creation api_key_name: str | None = None + api_key_role: UserRole user_id: uuid.UUID diff --git a/backend/ee/danswer/auth/users.py b/backend/ee/danswer/auth/users.py index f5f5dbd58f3..18dff6ab064 100644 --- a/backend/ee/danswer/auth/users.py +++ b/backend/ee/danswer/auth/users.py @@ -19,7 +19,7 @@ def verify_auth_setting() -> None: # All the Auth flows are valid for EE version - logger.info(f"Using Auth Type: {AUTH_TYPE.value}") + logger.notice(f"Using Auth Type: {AUTH_TYPE.value}") async def optional_user_( @@ -44,7 +44,12 @@ async def optional_user_( return user -def api_key_dep(request: Request, db_session: Session = Depends(get_session)) -> User: +def api_key_dep( + request: Request, db_session: Session = Depends(get_session) +) -> User | None: + if AUTH_TYPE == AuthType.DISABLED: + return None + hashed_api_key = get_hashed_api_key_from_request(request) if not hashed_api_key: raise HTTPException(status_code=401, detail="Missing API key") diff --git a/backend/ee/danswer/background/celery/celery_app.py b/backend/ee/danswer/background/celery/celery_app.py index 2dd3ecb472e..403adbd74e1 100644 --- a/backend/ee/danswer/background/celery/celery_app.py +++ b/backend/ee/danswer/background/celery/celery_app.py @@ -1,12 +1,18 @@ from datetime import timedelta +from typing import Any +from celery.signals import beat_init +from celery.signals import worker_init from sqlalchemy.orm import Session from danswer.background.celery.celery_app import celery_app from danswer.background.task_utils import build_celery_task_wrapper from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME +from danswer.configs.constants import POSTGRES_CELERY_WORKER_APP_NAME from danswer.db.chat import delete_chat_sessions_older_than from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import init_sqlalchemy_engine from danswer.server.settings.store import load_settings from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import global_version @@ -95,6 +101,16 @@ def autogenerate_usage_report_task() -> None: ) +@beat_init.connect +def on_beat_init(sender: Any, **kwargs: Any) -> None: + init_sqlalchemy_engine(POSTGRES_CELERY_BEAT_APP_NAME) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + init_sqlalchemy_engine(POSTGRES_CELERY_WORKER_APP_NAME) + + ##### # Celery Beat (Periodic Tasks) Settings ##### diff --git a/backend/ee/danswer/background/permission_sync.py b/backend/ee/danswer/background/permission_sync.py index b3e8845ab3b..c14094b6042 100644 --- a/backend/ee/danswer/background/permission_sync.py +++ b/backend/ee/danswer/background/permission_sync.py @@ -14,7 +14,9 @@ from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED from danswer.configs.constants import DocumentSource +from danswer.configs.constants import POSTGRES_PERMISSIONS_APP_NAME from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import init_sqlalchemy_engine from danswer.db.models import PermissionSyncStatus from danswer.utils.logger import setup_logger from ee.danswer.configs.app_configs import NUM_PERMISSION_WORKERS @@ -213,7 +215,8 @@ def permission_loop(delay: int = 60, num_workers: int = NUM_PERMISSION_WORKERS) def update__main() -> None: - logger.info("Starting Permission Syncing Loop") + logger.notice("Starting Permission Syncing Loop") + init_sqlalchemy_engine(POSTGRES_PERMISSIONS_APP_NAME) permission_loop() diff --git a/backend/ee/danswer/db/api_key.py b/backend/ee/danswer/db/api_key.py index c2555bb1127..c38f32a0f84 100644 --- a/backend/ee/danswer/db/api_key.py +++ b/backend/ee/danswer/db/api_key.py @@ -2,9 +2,9 @@ from fastapi_users.password import PasswordHelper from sqlalchemy import select +from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session -from danswer.auth.schemas import UserRole from danswer.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN from danswer.configs.constants import DANSWER_API_KEY_PREFIX from danswer.configs.constants import UNNAMED_KEY_PLACEHOLDER @@ -22,10 +22,15 @@ def is_api_key_email_address(email: str) -> bool: def fetch_api_keys(db_session: Session) -> list[ApiKeyDescriptor]: - api_keys = db_session.scalars(select(ApiKey)).all() + api_keys = ( + db_session.scalars(select(ApiKey).options(joinedload(ApiKey.user))) + .unique() + .all() + ) return [ ApiKeyDescriptor( api_key_id=api_key.id, + api_key_role=api_key.user.role, api_key_display=api_key.api_key_display, api_key_name=api_key.name, user_id=api_key.user_id, @@ -67,7 +72,7 @@ def insert_api_key( is_active=True, is_superuser=False, is_verified=True, - role=UserRole.BASIC, + role=api_key_args.role, ) db_session.add(api_key_user_row) @@ -83,6 +88,7 @@ def insert_api_key( db_session.commit() return ApiKeyDescriptor( api_key_id=api_key_row.id, + api_key_role=api_key_user_row.role, api_key_display=api_key_row.api_key_display, api_key=api_key, api_key_name=api_key_args.name, @@ -106,12 +112,14 @@ def update_api_key( email_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER api_key_user.email = get_api_key_fake_email(email_name, str(api_key_user.id)) + api_key_user.role = api_key_args.role db_session.commit() return ApiKeyDescriptor( api_key_id=existing_api_key.id, api_key_display=existing_api_key.api_key_display, api_key_name=api_key_args.name, + api_key_role=api_key_user.role, user_id=existing_api_key.user_id, ) @@ -122,6 +130,12 @@ def regenerate_api_key(db_session: Session, api_key_id: int) -> ApiKeyDescriptor if existing_api_key is None: raise ValueError(f"API key with id {api_key_id} does not exist") + api_key_user = db_session.scalar( + select(User).where(User.id == existing_api_key.user_id) # type: ignore + ) + if api_key_user is None: + raise RuntimeError("API Key does not have associated user.") + new_api_key = generate_api_key() existing_api_key.hashed_api_key = hash_api_key(new_api_key) existing_api_key.api_key_display = build_displayable_api_key(new_api_key) @@ -132,6 +146,7 @@ def regenerate_api_key(db_session: Session, api_key_id: int) -> ApiKeyDescriptor api_key_display=existing_api_key.api_key_display, api_key=new_api_key, api_key_name=existing_api_key.name, + api_key_role=api_key_user.role, user_id=existing_api_key.user_id, ) diff --git a/backend/ee/danswer/db/connector_credential_pair.py b/backend/ee/danswer/db/connector_credential_pair.py index a4938138592..a2172913476 100644 --- a/backend/ee/danswer/db/connector_credential_pair.py +++ b/backend/ee/danswer/db/connector_credential_pair.py @@ -1,13 +1,36 @@ +from sqlalchemy import delete from sqlalchemy.orm import Session from danswer.configs.constants import DocumentSource +from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.models import Connector from danswer.db.models import ConnectorCredentialPair +from danswer.db.models import UserGroup__ConnectorCredentialPair from danswer.utils.logger import setup_logger logger = setup_logger() +def _delete_connector_credential_pair_user_groups_relationship__no_commit( + db_session: Session, connector_id: int, credential_id: int +) -> None: + cc_pair = get_connector_credential_pair( + db_session=db_session, + connector_id=connector_id, + credential_id=credential_id, + ) + if cc_pair is None: + raise ValueError( + f"ConnectorCredentialPair with connector_id: {connector_id} " + f"and credential_id: {credential_id} not found" + ) + + stmt = delete(UserGroup__ConnectorCredentialPair).where( + UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair.id, + ) + db_session.execute(stmt) + + def get_cc_pairs_by_source( source_type: DocumentSource, db_session: Session, diff --git a/backend/ee/danswer/db/token_limit.py b/backend/ee/danswer/db/token_limit.py index 9b153811635..95dd0011853 100644 --- a/backend/ee/danswer/db/token_limit.py +++ b/backend/ee/danswer/db/token_limit.py @@ -1,16 +1,70 @@ from collections.abc import Sequence +from sqlalchemy import exists from sqlalchemy import Row +from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy.orm import aliased from sqlalchemy.orm import Session from danswer.configs.constants import TokenRateLimitScope from danswer.db.models import TokenRateLimit from danswer.db.models import TokenRateLimit__UserGroup +from danswer.db.models import User +from danswer.db.models import User__UserGroup from danswer.db.models import UserGroup +from danswer.db.models import UserRole from danswer.server.token_rate_limits.models import TokenRateLimitArgs +def _add_user_filters( + stmt: Select, user: User | None, get_editable: bool = True +) -> Select: + # If user is None, assume the user is an admin or auth is disabled + if user is None or user.role == UserRole.ADMIN: + return stmt + + TRLimit_UG = aliased(TokenRateLimit__UserGroup) + User__UG = aliased(User__UserGroup) + + """ + Here we select token_rate_limits by relation: + User -> User__UserGroup -> TokenRateLimit__UserGroup -> + TokenRateLimit + """ + stmt = stmt.outerjoin(TRLimit_UG).outerjoin( + User__UG, + User__UG.user_group_id == TRLimit_UG.user_group_id, + ) + + """ + Filter token_rate_limits by: + - if the user is in the user_group that owns the token_rate_limit + - if the user is not a global_curator, they must also have a curator relationship + to the user_group + - if editing is being done, we also filter out token_rate_limits that are owned by groups + that the user isn't a curator for + - if we are not editing, we show all token_rate_limits in the groups the user curates + """ + where_clause = User__UG.user_id == user.id + if user.role == UserRole.CURATOR and get_editable: + where_clause &= User__UG.is_curator == True # noqa: E712 + if get_editable: + user_groups = select(User__UG.user_group_id).where(User__UG.user_id == user.id) + if user.role == UserRole.CURATOR: + user_groups = user_groups.where( + User__UserGroup.is_curator == True # noqa: E712 + ) + where_clause &= ( + ~exists() + .where(TRLimit_UG.rate_limit_id == TokenRateLimit.id) + .where(~TRLimit_UG.user_group_id.in_(user_groups)) + .correlate(TokenRateLimit) + ) + + return stmt.where(where_clause) + + def fetch_all_user_token_rate_limits( db_session: Session, enabled_only: bool = False, @@ -48,29 +102,25 @@ def fetch_all_global_token_rate_limits( return token_rate_limits -def fetch_all_user_group_token_rate_limits( - db_session: Session, group_id: int, enabled_only: bool = False, ordered: bool = True +def fetch_user_group_token_rate_limits( + db_session: Session, + group_id: int, + user: User | None = None, + enabled_only: bool = False, + ordered: bool = True, + get_editable: bool = True, ) -> Sequence[TokenRateLimit]: - query = ( - select(TokenRateLimit) - .join( - TokenRateLimit__UserGroup, - TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id, - ) - .where( - TokenRateLimit__UserGroup.user_group_id == group_id, - TokenRateLimit.scope == TokenRateLimitScope.USER_GROUP, - ) - ) + stmt = select(TokenRateLimit) + stmt = stmt.where(User__UserGroup.user_group_id == group_id) + stmt = _add_user_filters(stmt, user, get_editable) if enabled_only: - query = query.where(TokenRateLimit.enabled.is_(True)) + stmt = stmt.where(TokenRateLimit.enabled.is_(True)) if ordered: - query = query.order_by(TokenRateLimit.created_at.desc()) + stmt = stmt.order_by(TokenRateLimit.created_at.desc()) - token_rate_limits = db_session.scalars(query).all() - return token_rate_limits + return db_session.scalars(stmt).all() def fetch_all_user_group_token_rate_limits_by_group( diff --git a/backend/ee/danswer/db/user_group.py b/backend/ee/danswer/db/user_group.py index 0451db9b633..9d172c5d716 100644 --- a/backend/ee/danswer/db/user_group.py +++ b/backend/ee/danswer/db/user_group.py @@ -2,22 +2,33 @@ from operator import and_ from uuid import UUID +from sqlalchemy import delete from sqlalchemy import func from sqlalchemy import select +from sqlalchemy import update from sqlalchemy.orm import Session +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import ConnectorCredentialPair +from danswer.db.models import Credential__UserGroup from danswer.db.models import Document from danswer.db.models import DocumentByConnectorCredentialPair +from danswer.db.models import LLMProvider__UserGroup from danswer.db.models import TokenRateLimit__UserGroup from danswer.db.models import User from danswer.db.models import User__UserGroup from danswer.db.models import UserGroup from danswer.db.models import UserGroup__ConnectorCredentialPair -from danswer.server.documents.models import ConnectorCredentialPairIdentifier +from danswer.db.models import UserRole +from danswer.db.users import fetch_user_by_id +from danswer.utils.logger import setup_logger +from ee.danswer.server.user_group.models import SetCuratorRequest from ee.danswer.server.user_group.models import UserGroupCreate from ee.danswer.server.user_group.models import UserGroupUpdate +logger = setup_logger() + def fetch_user_group(db_session: Session, user_group_id: int) -> UserGroup | None: stmt = select(UserGroup).where(UserGroup.id == user_group_id) @@ -34,7 +45,7 @@ def fetch_user_groups( def fetch_user_groups_for_user( - db_session: Session, user_id: UUID + db_session: Session, user_id: UUID, only_curator_groups: bool = False ) -> Sequence[UserGroup]: stmt = ( select(UserGroup) @@ -42,6 +53,8 @@ def fetch_user_groups_for_user( .join(User, User.id == User__UserGroup.user_id) # type: ignore .where(User.id == user_id) # type: ignore ) + if only_curator_groups: + stmt = stmt.where(User__UserGroup.is_curator == True) # noqa: E712 return db_session.scalars(stmt).all() @@ -89,7 +102,6 @@ def fetch_documents_for_user_group_paginated( def fetch_user_groups_for_documents( db_session: Session, document_ids: list[str], - cc_pair_to_delete: ConnectorCredentialPairIdentifier | None = None, ) -> Sequence[tuple[int, list[str]]]: stmt = ( select(Document.id, func.array_agg(UserGroup.name)) @@ -113,19 +125,12 @@ def fetch_user_groups_for_documents( .join(Document, Document.id == DocumentByConnectorCredentialPair.id) .where(Document.id.in_(document_ids)) .where(UserGroup__ConnectorCredentialPair.is_current == True) # noqa: E712 + # don't include CC pairs that are being deleted + # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them + .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING) .group_by(Document.id) ) - # pretend that the specified cc pair doesn't exist - if cc_pair_to_delete is not None: - stmt = stmt.where( - and_( - ConnectorCredentialPair.connector_id != cc_pair_to_delete.connector_id, - ConnectorCredentialPair.credential_id - != cc_pair_to_delete.credential_id, - ) - ) - return db_session.execute(stmt).all() # type: ignore @@ -184,16 +189,41 @@ def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserG def _cleanup_user__user_group_relationships__no_commit( - db_session: Session, user_group_id: int + db_session: Session, + user_group_id: int, + user_ids: list[UUID] | None = None, ) -> None: """NOTE: does not commit the transaction.""" + where_clause = User__UserGroup.user_group_id == user_group_id + if user_ids: + where_clause &= User__UserGroup.user_id.in_(user_ids) + user__user_group_relationships = db_session.scalars( - select(User__UserGroup).where(User__UserGroup.user_group_id == user_group_id) + select(User__UserGroup).where(where_clause) ).all() for user__user_group_relationship in user__user_group_relationships: db_session.delete(user__user_group_relationship) +def _cleanup_credential__user_group_relationships__no_commit( + db_session: Session, + user_group_id: int, +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(Credential__UserGroup).filter( + Credential__UserGroup.user_group_id == user_group_id + ).delete(synchronize_session=False) + + +def _cleanup_llm_provider__user_group_relationships__no_commit( + db_session: Session, user_group_id: int +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(LLMProvider__UserGroup).filter( + LLMProvider__UserGroup.user_group_id == user_group_id + ).delete(synchronize_session=False) + + def _mark_user_group__cc_pair_relationships_outdated__no_commit( db_session: Session, user_group_id: int ) -> None: @@ -207,8 +237,84 @@ def _mark_user_group__cc_pair_relationships_outdated__no_commit( user_group__cc_pair_relationship.is_current = False +def _validate_curator_status__no_commit( + db_session: Session, + users: list[User], +) -> None: + for user in users: + # Check if the user is a curator in any of their groups + curator_relationships = ( + db_session.query(User__UserGroup) + .filter( + User__UserGroup.user_id == user.id, + User__UserGroup.is_curator == True, # noqa: E712 + ) + .all() + ) + + if curator_relationships: + user.role = UserRole.CURATOR + elif user.role == UserRole.CURATOR: + user.role = UserRole.BASIC + db_session.add(user) + + +def remove_curator_status__no_commit(db_session: Session, user: User) -> None: + stmt = ( + update(User__UserGroup) + .where(User__UserGroup.user_id == user.id) + .values(is_curator=False) + ) + db_session.execute(stmt) + _validate_curator_status__no_commit(db_session, [user]) + + +def update_user_curator_relationship( + db_session: Session, + user_group_id: int, + set_curator_request: SetCuratorRequest, +) -> None: + user = fetch_user_by_id(db_session, set_curator_request.user_id) + if not user: + raise ValueError(f"User with id '{set_curator_request.user_id}' not found") + requested_user_groups = fetch_user_groups_for_user( + db_session=db_session, + user_id=set_curator_request.user_id, + only_curator_groups=False, + ) + + group_ids = [group.id for group in requested_user_groups] + if user_group_id not in group_ids: + raise ValueError(f"user is not in group '{user_group_id}'") + + relationship_to_update = ( + db_session.query(User__UserGroup) + .filter( + User__UserGroup.user_group_id == user_group_id, + User__UserGroup.user_id == set_curator_request.user_id, + ) + .first() + ) + + if relationship_to_update: + relationship_to_update.is_curator = set_curator_request.is_curator + else: + relationship_to_update = User__UserGroup( + user_group_id=user_group_id, + user_id=set_curator_request.user_id, + is_curator=True, + ) + db_session.add(relationship_to_update) + + _validate_curator_status__no_commit(db_session, [user]) + db_session.commit() + + def update_user_group( - db_session: Session, user_group_id: int, user_group: UserGroupUpdate + db_session: Session, + user: User | None, + user_group_id: int, + user_group_update: UserGroupUpdate, ) -> UserGroup: stmt = select(UserGroup).where(UserGroup.id == user_group_id) db_user_group = db_session.scalar(stmt) @@ -217,23 +323,35 @@ def update_user_group( _check_user_group_is_modifiable(db_user_group) - existing_cc_pairs = db_user_group.cc_pairs - cc_pairs_updated = set([cc_pair.id for cc_pair in existing_cc_pairs]) != set( - user_group.cc_pair_ids - ) - users_updated = set([user.id for user in db_user_group.users]) != set( - user_group.user_ids - ) + current_user_ids = set([user.id for user in db_user_group.users]) + updated_user_ids = set(user_group_update.user_ids) + added_user_ids = list(updated_user_ids - current_user_ids) + removed_user_ids = list(current_user_ids - updated_user_ids) - if users_updated: + # LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES + # ACCESS TO DIFFERENT PERMISSIONS + # if (removed_user_ids or added_user_ids) and ( + # not user or user.role != UserRole.ADMIN + # ): + # raise ValueError("Only admins can add or remove users from user groups") + + if removed_user_ids: _cleanup_user__user_group_relationships__no_commit( - db_session=db_session, user_group_id=user_group_id + db_session=db_session, + user_group_id=user_group_id, + user_ids=removed_user_ids, ) + + if added_user_ids: _add_user__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id, - user_ids=user_group.user_ids, + user_ids=added_user_ids, ) + + cc_pairs_updated = set([cc_pair.id for cc_pair in db_user_group.cc_pairs]) != set( + user_group_update.cc_pair_ids + ) if cc_pairs_updated: _mark_user_group__cc_pair_relationships_outdated__no_commit( db_session=db_session, user_group_id=user_group_id @@ -241,13 +359,17 @@ def update_user_group( _add_user_group__cc_pair_relationships__no_commit( db_session=db_session, user_group_id=db_user_group.id, - cc_pair_ids=user_group.cc_pair_ids, + cc_pair_ids=user_group_update.cc_pair_ids, ) # only needs to sync with Vespa if the cc_pairs have been updated if cc_pairs_updated: db_user_group.is_up_to_date = False + removed_users = db_session.scalars( + select(User).where(User.id.in_(removed_user_ids)) # type: ignore + ).unique() + _validate_curator_status__no_commit(db_session, list(removed_users)) db_session.commit() return db_user_group @@ -275,6 +397,9 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> _check_user_group_is_modifiable(db_user_group) + _cleanup_credential__user_group_relationships__no_commit( + db_session=db_session, user_group_id=user_group_id + ) _cleanup_user__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) @@ -316,6 +441,9 @@ def mark_user_group_as_synced(db_session: Session, user_group: UserGroup) -> Non def delete_user_group(db_session: Session, user_group: UserGroup) -> None: + _cleanup_llm_provider__user_group_relationships__no_commit( + db_session=db_session, user_group_id=user_group.id + ) _cleanup_user__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group.id ) @@ -330,3 +458,25 @@ def delete_user_group(db_session: Session, user_group: UserGroup) -> None: db_session.delete(user_group) db_session.commit() + + +def delete_user_group_cc_pair_relationship__no_commit( + cc_pair_id: int, db_session: Session +) -> None: + """Deletes all rows from UserGroup__ConnectorCredentialPair where the + connector_credential_pair_id matches the given cc_pair_id. + + Should be used very carefully (only for connectors that are being deleted).""" + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: + raise ValueError(f"Connector Credential Pair '{cc_pair_id}' does not exist") + + if cc_pair.status != ConnectorCredentialPairStatus.DELETING: + raise ValueError( + f"Connector Credential Pair '{cc_pair_id}' is not in the DELETING state" + ) + + delete_stmt = delete(UserGroup__ConnectorCredentialPair).where( + UserGroup__ConnectorCredentialPair.cc_pair_id == cc_pair_id, + ) + db_session.execute(delete_stmt) diff --git a/backend/ee/danswer/server/analytics/api.py b/backend/ee/danswer/server/analytics/api.py index 19415e506d3..f79199323f5 100644 --- a/backend/ee/danswer/server/analytics/api.py +++ b/backend/ee/danswer/server/analytics/api.py @@ -6,9 +6,9 @@ from pydantic import BaseModel from sqlalchemy.orm import Session -import danswer.db.models as db_models from danswer.auth.users import current_admin_user from danswer.db.engine import get_session +from danswer.db.models import User from ee.danswer.db.analytics import fetch_danswerbot_analytics from ee.danswer.db.analytics import fetch_per_user_query_analytics from ee.danswer.db.analytics import fetch_query_analytics @@ -27,7 +27,7 @@ class QueryAnalyticsResponse(BaseModel): def get_query_analytics( start: datetime.datetime | None = None, end: datetime.datetime | None = None, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[QueryAnalyticsResponse]: daily_query_usage_info = fetch_query_analytics( @@ -58,7 +58,7 @@ class UserAnalyticsResponse(BaseModel): def get_user_analytics( start: datetime.datetime | None = None, end: datetime.datetime | None = None, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[UserAnalyticsResponse]: daily_query_usage_info_per_user = fetch_per_user_query_analytics( @@ -92,7 +92,7 @@ class DanswerbotAnalyticsResponse(BaseModel): def get_danswerbot_analytics( start: datetime.datetime | None = None, end: datetime.datetime | None = None, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[DanswerbotAnalyticsResponse]: daily_danswerbot_info = fetch_danswerbot_analytics( diff --git a/backend/ee/danswer/server/api_key/api.py b/backend/ee/danswer/server/api_key/api.py index d9a62d0655a..c7353f055fb 100644 --- a/backend/ee/danswer/server/api_key/api.py +++ b/backend/ee/danswer/server/api_key/api.py @@ -2,9 +2,9 @@ from fastapi import Depends from sqlalchemy.orm import Session -import danswer.db.models as db_models from danswer.auth.users import current_admin_user from danswer.db.engine import get_session +from danswer.db.models import User from ee.danswer.db.api_key import ApiKeyDescriptor from ee.danswer.db.api_key import fetch_api_keys from ee.danswer.db.api_key import insert_api_key @@ -13,12 +13,13 @@ from ee.danswer.db.api_key import update_api_key from ee.danswer.server.api_key.models import APIKeyArgs + router = APIRouter(prefix="/admin/api-key") @router.get("") def list_api_keys( - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[ApiKeyDescriptor]: return fetch_api_keys(db_session) @@ -27,7 +28,7 @@ def list_api_keys( @router.post("") def create_api_key( api_key_args: APIKeyArgs, - user: db_models.User | None = Depends(current_admin_user), + user: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ApiKeyDescriptor: return insert_api_key(db_session, api_key_args, user.id if user else None) @@ -36,7 +37,7 @@ def create_api_key( @router.post("/{api_key_id}/regenerate") def regenerate_existing_api_key( api_key_id: int, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ApiKeyDescriptor: return regenerate_api_key(db_session, api_key_id) @@ -46,7 +47,7 @@ def regenerate_existing_api_key( def update_existing_api_key( api_key_id: int, api_key_args: APIKeyArgs, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ApiKeyDescriptor: return update_api_key(db_session, api_key_id, api_key_args) @@ -55,7 +56,7 @@ def update_existing_api_key( @router.delete("/{api_key_id}") def delete_api_key( api_key_id: int, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> None: remove_api_key(db_session, api_key_id) diff --git a/backend/ee/danswer/server/api_key/models.py b/backend/ee/danswer/server/api_key/models.py index a26e27512e4..596d02c43a8 100644 --- a/backend/ee/danswer/server/api_key/models.py +++ b/backend/ee/danswer/server/api_key/models.py @@ -1,5 +1,8 @@ from pydantic import BaseModel +from danswer.auth.schemas import UserRole + class APIKeyArgs(BaseModel): name: str | None = None + role: UserRole = UserRole.BASIC diff --git a/backend/ee/danswer/server/auth_check.py b/backend/ee/danswer/server/auth_check.py index d0ba3ffe46c..49353abf84c 100644 --- a/backend/ee/danswer/server/auth_check.py +++ b/backend/ee/danswer/server/auth_check.py @@ -8,6 +8,7 @@ # needs to be accessible prior to user login ("/enterprise-settings", {"GET"}), ("/enterprise-settings/logo", {"GET"}), + ("/enterprise-settings/logotype", {"GET"}), ("/enterprise-settings/custom-analytics-script", {"GET"}), # oidc ("/auth/oidc/authorize", {"GET"}), diff --git a/backend/ee/danswer/server/enterprise_settings/api.py b/backend/ee/danswer/server/enterprise_settings/api.py index 85f43ca5541..736296517db 100644 --- a/backend/ee/danswer/server/enterprise_settings/api.py +++ b/backend/ee/danswer/server/enterprise_settings/api.py @@ -12,6 +12,7 @@ from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings from ee.danswer.server.enterprise_settings.store import _LOGO_FILENAME +from ee.danswer.server.enterprise_settings.store import _LOGOTYPE_FILENAME from ee.danswer.server.enterprise_settings.store import load_analytics_script from ee.danswer.server.enterprise_settings.store import load_settings from ee.danswer.server.enterprise_settings.store import store_analytics_script @@ -41,22 +42,38 @@ def fetch_settings() -> EnterpriseSettings: @admin_router.put("/logo") def put_logo( file: UploadFile, + is_logotype: bool = False, db_session: Session = Depends(get_session), _: User | None = Depends(current_admin_user), ) -> None: - upload_logo(file=file, db_session=db_session) + upload_logo(file=file, db_session=db_session, is_logotype=is_logotype) -@basic_router.get("/logo") -def fetch_logo(db_session: Session = Depends(get_session)) -> Response: +def fetch_logo_or_logotype(is_logotype: bool, db_session: Session) -> Response: try: file_store = get_default_file_store(db_session) - file_io = file_store.read_file(_LOGO_FILENAME, mode="b") + filename = _LOGOTYPE_FILENAME if is_logotype else _LOGO_FILENAME + file_io = file_store.read_file(filename, mode="b") # NOTE: specifying "image/jpeg" here, but it still works for pngs # TODO: do this properly return Response(content=file_io.read(), media_type="image/jpeg") except Exception: - raise HTTPException(status_code=404, detail="No logo file found") + raise HTTPException( + status_code=404, + detail=f"No {'logotype' if is_logotype else 'logo'} file found", + ) + + +@basic_router.get("/logotype") +def fetch_logotype(db_session: Session = Depends(get_session)) -> Response: + return fetch_logo_or_logotype(is_logotype=True, db_session=db_session) + + +@basic_router.get("/logo") +def fetch_logo( + is_logotype: bool = False, db_session: Session = Depends(get_session) +) -> Response: + return fetch_logo_or_logotype(is_logotype=is_logotype, db_session=db_session) @admin_router.put("/custom-analytics-script") diff --git a/backend/ee/danswer/server/enterprise_settings/models.py b/backend/ee/danswer/server/enterprise_settings/models.py index c2142c64162..c9831d87aeb 100644 --- a/backend/ee/danswer/server/enterprise_settings/models.py +++ b/backend/ee/danswer/server/enterprise_settings/models.py @@ -8,8 +8,10 @@ class EnterpriseSettings(BaseModel): application_name: str | None = None use_custom_logo: bool = False + use_custom_logotype: bool = False # custom Chat components + custom_lower_disclaimer_content: str | None = None custom_header_content: str | None = None custom_popup_header: str | None = None custom_popup_content: str | None = None diff --git a/backend/ee/danswer/server/enterprise_settings/store.py b/backend/ee/danswer/server/enterprise_settings/store.py index 99fb1cc90d6..30b72d5d2e8 100644 --- a/backend/ee/danswer/server/enterprise_settings/store.py +++ b/backend/ee/danswer/server/enterprise_settings/store.py @@ -9,6 +9,8 @@ from sqlalchemy.orm import Session from danswer.configs.constants import FileOrigin +from danswer.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY +from danswer.configs.constants import KV_ENTERPRISE_SETTINGS_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store @@ -17,7 +19,6 @@ from ee.danswer.server.enterprise_settings.models import EnterpriseSettings -_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" logger = setup_logger() @@ -25,27 +26,26 @@ def load_settings() -> EnterpriseSettings: dynamic_config_store = get_dynamic_config_store() try: settings = EnterpriseSettings( - **cast(dict, dynamic_config_store.load(_ENTERPRISE_SETTINGS_KEY)) + **cast(dict, dynamic_config_store.load(KV_ENTERPRISE_SETTINGS_KEY)) ) except ConfigNotFoundError: settings = EnterpriseSettings() - dynamic_config_store.store(_ENTERPRISE_SETTINGS_KEY, settings.dict()) + dynamic_config_store.store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump()) return settings def store_settings(settings: EnterpriseSettings) -> None: - get_dynamic_config_store().store(_ENTERPRISE_SETTINGS_KEY, settings.dict()) + get_dynamic_config_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump()) -_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" _CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY") def load_analytics_script() -> str | None: dynamic_config_store = get_dynamic_config_store() try: - return cast(str, dynamic_config_store.load(_CUSTOM_ANALYTICS_SCRIPT_KEY)) + return cast(str, dynamic_config_store.load(KV_CUSTOM_ANALYTICS_SCRIPT_KEY)) except ConfigNotFoundError: return None @@ -58,11 +58,12 @@ def store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> No raise ValueError("Invalid secret key") get_dynamic_config_store().store( - _CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script + KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script ) _LOGO_FILENAME = "__logo__" +_LOGOTYPE_FILENAME = "__logotype__" def is_valid_file_type(filename: str) -> bool: @@ -79,13 +80,12 @@ def guess_file_type(filename: str) -> str: def upload_logo( - db_session: Session, - file: UploadFile | str, + db_session: Session, file: UploadFile | str, is_logotype: bool = False ) -> bool: content: IO[Any] if isinstance(file, str): - logger.info(f"Uploading logo from local path {file}") + logger.notice(f"Uploading logo from local path {file}") if not os.path.isfile(file) or not is_valid_file_type(file): logger.error( "Invalid file type- only .png, .jpg, and .jpeg files are allowed" @@ -99,7 +99,7 @@ def upload_logo( file_type = guess_file_type(file) else: - logger.info("Uploading logo from uploaded file") + logger.notice("Uploading logo from uploaded file") if not file.filename or not is_valid_file_type(file.filename): raise HTTPException( status_code=400, @@ -111,7 +111,7 @@ def upload_logo( file_store = get_default_file_store(db_session) file_store.save_file( - file_name=_LOGO_FILENAME, + file_name=_LOGOTYPE_FILENAME if is_logotype else _LOGO_FILENAME, content=content, display_name=display_name, file_origin=FileOrigin.OTHER, diff --git a/backend/ee/danswer/server/query_and_chat/chat_backend.py b/backend/ee/danswer/server/query_and_chat/chat_backend.py index 1d24026003f..0d5d1987f34 100644 --- a/backend/ee/danswer/server/query_and_chat/chat_backend.py +++ b/backend/ee/danswer/server/query_and_chat/chat_backend.py @@ -8,18 +8,31 @@ from danswer.auth.users import current_user from danswer.chat.chat_utils import create_chat_chain from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError from danswer.chat.process_message import stream_chat_message_objects +from danswer.configs.constants import MessageType +from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE +from danswer.db.chat import create_chat_session +from danswer.db.chat import create_new_chat_message from danswer.db.chat import get_or_create_root_message from danswer.db.engine import get_session from danswer.db.models import User +from danswer.llm.factory import get_llms_for_persona +from danswer.llm.utils import get_max_input_tokens +from danswer.natural_language_processing.utils import get_tokenizer +from danswer.one_shot_answer.qa_utils import combine_message_thread from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails +from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.utils.logger import setup_logger from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest +from ee.danswer.server.query_and_chat.models import ( + BasicCreateChatMessageWithHistoryRequest, +) from ee.danswer.server.query_and_chat.models import ChatBasicResponse from ee.danswer.server.query_and_chat.models import SimpleDoc @@ -33,6 +46,7 @@ def translate_doc_response_to_simple_doc( ) -> list[SimpleDoc]: return [ SimpleDoc( + id=doc.document_id, semantic_identifier=doc.semantic_identifier, link=doc.link, blurb=doc.blurb, @@ -40,6 +54,7 @@ def translate_doc_response_to_simple_doc( highlight for highlight in doc.match_highlights if highlight ], source_type=doc.source_type, + metadata=doc.metadata, ) for doc in doc_response.top_documents ] @@ -58,7 +73,7 @@ def handle_simplified_chat_message( db_session: Session = Depends(get_session), ) -> ChatBasicResponse: """This is a Non-Streaming version that only gives back a minimal set of information""" - logger.info(f"Received new simple api chat message: {chat_message_req.message}") + logger.notice(f"Received new simple api chat message: {chat_message_req.message}") if not chat_message_req.message: raise HTTPException(status_code=400, detail="Empty chat message is invalid") @@ -92,8 +107,9 @@ def handle_simplified_chat_message( search_doc_ids=chat_message_req.search_doc_ids, retrieval_options=retrieval_options, query_override=chat_message_req.query_override, - chunks_above=chat_message_req.chunks_above, - chunks_below=chat_message_req.chunks_below, + # Currently only applies to search flow not chat + chunks_above=0, + chunks_below=0, full_doc=chat_message_req.full_doc, ) @@ -121,3 +137,131 @@ def handle_simplified_chat_message( response.answer_citationless = remove_answer_citations(answer) return response + + +@router.post("/send-message-simple-with-history") +def handle_send_message_simple_with_history( + req: BasicCreateChatMessageWithHistoryRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> ChatBasicResponse: + """This is a Non-Streaming version that only gives back a minimal set of information. + takes in chat history maintained by the caller + and does query rephrasing similar to answer-with-quote""" + + if len(req.messages) == 0: + raise HTTPException(status_code=400, detail="Messages cannot be zero length") + + expected_role = MessageType.USER + for msg in req.messages: + if not msg.message: + raise HTTPException( + status_code=400, detail="One or more chat messages were empty" + ) + + if msg.role != expected_role: + raise HTTPException( + status_code=400, + detail="Message roles must start and end with MessageType.USER and alternate in-between.", + ) + if expected_role == MessageType.USER: + expected_role = MessageType.ASSISTANT + else: + expected_role = MessageType.USER + + query = req.messages[-1].message + msg_history = req.messages[:-1] + + logger.notice(f"Received new simple with history chat message: {query}") + + user_id = user.id if user is not None else None + chat_session = create_chat_session( + db_session=db_session, + description="handle_send_message_simple_with_history", + user_id=user_id, + persona_id=req.persona_id, + one_shot=False, + ) + + llm, _ = get_llms_for_persona(persona=chat_session.persona) + + llm_tokenizer = get_tokenizer( + model_name=llm.config.model_name, + provider_type=llm.config.model_provider, + ) + + input_tokens = get_max_input_tokens( + model_name=llm.config.model_name, model_provider=llm.config.model_provider + ) + max_history_tokens = int(input_tokens * DANSWER_BOT_TARGET_CHUNK_PERCENTAGE) + + # Every chat Session begins with an empty root message + root_message = get_or_create_root_message( + chat_session_id=chat_session.id, db_session=db_session + ) + + chat_message = root_message + for msg in msg_history: + chat_message = create_new_chat_message( + chat_session_id=chat_session.id, + parent_message=chat_message, + prompt_id=req.prompt_id, + message=msg.message, + token_count=len(llm_tokenizer.encode(msg.message)), + message_type=msg.role, + db_session=db_session, + commit=False, + ) + db_session.commit() + + history_str = combine_message_thread( + messages=msg_history, + max_tokens=max_history_tokens, + llm_tokenizer=llm_tokenizer, + ) + + rephrased_query = req.query_override or thread_based_query_rephrase( + user_query=query, + history_str=history_str, + ) + + full_chat_msg_info = CreateChatMessageRequest( + chat_session_id=chat_session.id, + parent_message_id=chat_message.id, + message=query, + file_descriptors=[], + prompt_id=req.prompt_id, + search_doc_ids=None, + retrieval_options=req.retrieval_options, + query_override=rephrased_query, + chunks_above=0, + chunks_below=0, + full_doc=req.full_doc, + ) + + packets = stream_chat_message_objects( + new_msg_req=full_chat_msg_info, + user=user, + db_session=db_session, + ) + + response = ChatBasicResponse() + + answer = "" + for packet in packets: + if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece: + answer += packet.answer_piece + elif isinstance(packet, QADocsResponse): + response.simple_search_docs = translate_doc_response_to_simple_doc(packet) + elif isinstance(packet, StreamingError): + response.error_msg = packet.error + elif isinstance(packet, ChatMessageDetail): + response.message_id = packet.message_id + elif isinstance(packet, LLMRelevanceFilterResponse): + response.llm_chunks_indices = packet.relevant_chunk_indices + + response.answer = answer + if answer: + response.answer_citationless = remove_answer_citations(answer) + + return response diff --git a/backend/ee/danswer/server/query_and_chat/models.py b/backend/ee/danswer/server/query_and_chat/models.py index ec5ec5ac411..b0ce553ebe0 100644 --- a/backend/ee/danswer/server/query_and_chat/models.py +++ b/backend/ee/danswer/server/query_and_chat/models.py @@ -1,9 +1,23 @@ from pydantic import BaseModel +from pydantic import Field from danswer.configs.constants import DocumentSource +from danswer.one_shot_answer.models import ThreadMessage +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import SearchType from danswer.search.models import ChunkContext +from danswer.search.models import RerankingDetails from danswer.search.models import RetrievalDetails +from danswer.server.manage.models import StandardAnswer + + +class StandardAnswerRequest(BaseModel): + message: str + slack_bot_categories: list[str] + + +class StandardAnswerResponse(BaseModel): + standard_answers: list[StandardAnswer] = Field(default_factory=list) class DocumentSearchRequest(ChunkContext): @@ -11,9 +25,9 @@ class DocumentSearchRequest(ChunkContext): search_type: SearchType retrieval_options: RetrievalDetails recency_bias_multiplier: float = 1.0 - # This is to forcibly skip (or run) the step, if None it uses the system defaults - skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None + evaluation_type: LLMEvaluationType + # None to use system defaults for reranking + rerank_settings: RerankingDetails | None = None class BasicCreateChatMessageRequest(ChunkContext): @@ -33,12 +47,24 @@ class BasicCreateChatMessageRequest(ChunkContext): search_doc_ids: list[int] | None = None +class BasicCreateChatMessageWithHistoryRequest(ChunkContext): + # Last element is the new query. All previous elements are historical context + messages: list[ThreadMessage] + prompt_id: int | None + persona_id: int + retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) + query_override: str | None = None + skip_rerank: bool | None = None + + class SimpleDoc(BaseModel): + id: str semantic_identifier: str link: str | None blurb: str match_highlights: list[str] source_type: DocumentSource + metadata: dict | None class ChatBasicResponse(BaseModel): @@ -48,3 +74,4 @@ class ChatBasicResponse(BaseModel): simple_search_docs: list[SimpleDoc] | None = None error_msg: str | None = None message_id: int | None = None + llm_chunks_indices: list[int] | None = None diff --git a/backend/ee/danswer/server/query_and_chat/query_backend.py b/backend/ee/danswer/server/query_and_chat/query_backend.py index f6cf7297a4f..aef3648220e 100644 --- a/backend/ee/danswer/server/query_and_chat/query_backend.py +++ b/backend/ee/danswer/server/query_and_chat/query_backend.py @@ -1,10 +1,14 @@ from fastapi import APIRouter from fastapi import Depends +from fastapi import HTTPException from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_user from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE +from danswer.danswerbot.slack.handlers.handle_standard_answers import ( + oneoff_standard_answers, +) from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.persona import get_persona_by_id @@ -23,8 +27,11 @@ from danswer.search.pipeline import SearchPipeline from danswer.search.utils import dedupe_documents from danswer.search.utils import drop_llm_indices +from danswer.search.utils import relevant_sections_to_indices from danswer.utils.logger import setup_logger from ee.danswer.server.query_and_chat.models import DocumentSearchRequest +from ee.danswer.server.query_and_chat.models import StandardAnswerRequest +from ee.danswer.server.query_and_chat.models import StandardAnswerResponse logger = setup_logger() @@ -44,9 +51,10 @@ def handle_search_request( ) -> DocumentSearchResponse: """Simple search endpoint, does not create a new message or records in the DB""" query = search_request.message - logger.info(f"Received document search query: {query}") + logger.notice(f"Received document search query: {query}") llm, fast_llm = get_default_llms() + search_pipeline = SearchPipeline( search_request=SearchRequest( query=query, @@ -56,8 +64,8 @@ def handle_search_request( persona=None, # For simplicity, default settings should be good for this search offset=search_request.retrieval_options.offset, limit=search_request.retrieval_options.limit, - skip_rerank=search_request.skip_rerank, - skip_llm_chunk_filter=search_request.skip_llm_chunk_filter, + rerank_settings=search_request.rerank_settings, + evaluation_type=search_request.evaluation_type, chunks_above=search_request.chunks_above, chunks_below=search_request.chunks_below, full_doc=search_request.full_doc, @@ -69,8 +77,7 @@ def handle_search_request( bypass_acl=False, ) top_sections = search_pipeline.reranked_sections - # If using surrounding context or full doc, this will be empty - relevant_section_indices = search_pipeline.relevant_section_indices + relevance_sections = search_pipeline.section_relevance top_docs = [ SavedSearchDocWithContent( document_id=section.center_chunk.document_id, @@ -99,19 +106,22 @@ def handle_search_request( # Deduping happens at the last step to avoid harming quality by dropping content early on deduped_docs = top_docs dropped_inds = None + if search_request.retrieval_options.dedupe_docs: deduped_docs, dropped_inds = dedupe_documents(top_docs) + llm_indices = relevant_sections_to_indices( + relevance_sections=relevance_sections, items=deduped_docs + ) + if dropped_inds: - relevant_section_indices = drop_llm_indices( - llm_indices=relevant_section_indices, + llm_indices = drop_llm_indices( + llm_indices=llm_indices, search_docs=deduped_docs, dropped_indices=dropped_inds, ) - return DocumentSearchResponse( - top_documents=deduped_docs, llm_indices=relevant_section_indices - ) + return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices) @basic_router.post("/answer-with-quote") @@ -121,7 +131,7 @@ def get_answer_with_quote( db_session: Session = Depends(get_session), ) -> OneShotQAResponse: query = query_request.messages[0].message - logger.info(f"Received query for one shot answer API with quotes: {query}") + logger.notice(f"Received query for one shot answer API with quotes: {query}") persona = get_persona_by_id( persona_id=query_request.persona_id, @@ -155,3 +165,21 @@ def get_answer_with_quote( ) return answer_details + + +@basic_router.get("/standard-answer") +def get_standard_answer( + request: StandardAnswerRequest, + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> StandardAnswerResponse: + try: + standard_answers = oneoff_standard_answers( + message=request.message, + slack_bot_categories=request.slack_bot_categories, + db_session=db_session, + ) + return StandardAnswerResponse(standard_answers=standard_answers) + except Exception as e: + logger.error(f"Error in get_standard_answer: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail="An internal server error occurred") diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py index c01c82794e0..ed532a85603 100644 --- a/backend/ee/danswer/server/query_history/api.py +++ b/backend/ee/danswer/server/query_history/api.py @@ -12,7 +12,6 @@ from pydantic import BaseModel from sqlalchemy.orm import Session -import danswer.db.models as db_models from danswer.auth.users import current_admin_user from danswer.auth.users import get_display_email from danswer.chat.chat_utils import create_chat_chain @@ -22,9 +21,9 @@ from danswer.db.engine import get_session from danswer.db.models import ChatMessage from danswer.db.models import ChatSession +from danswer.db.models import User from ee.danswer.db.query_history import fetch_chat_sessions_eagerly_by_time - router = APIRouter() @@ -103,6 +102,10 @@ class ChatSessionSnapshot(BaseModel): class QuestionAnswerPairSnapshot(BaseModel): + chat_session_id: int + # 1-indexed message number in the chat_session + # e.g. the first message pair in the chat_session is 1, the second is 2, etc. + message_pair_num: int user_message: str ai_response: str retrieved_documents: list[AbridgedSearchDoc] @@ -128,6 +131,8 @@ def from_chat_session_snapshot( return [ cls( + chat_session_id=chat_session_snapshot.id, + message_pair_num=ind + 1, user_message=user_message.message, ai_response=ai_message.message, retrieved_documents=ai_message.documents, @@ -137,11 +142,13 @@ def from_chat_session_snapshot( user_email=get_display_email(chat_session_snapshot.user_email), time_created=user_message.time_created, ) - for user_message, ai_message in message_pairs + for ind, (user_message, ai_message) in enumerate(message_pairs) ] def to_json(self) -> dict[str, str]: return { + "chat_session_id": str(self.chat_session_id), + "message_pair_num": str(self.message_pair_num), "user_message": self.user_message, "ai_response": self.ai_response, "retrieved_documents": "|".join( @@ -303,7 +310,7 @@ def get_chat_session_history( feedback_type: QAFeedbackType | None = None, start: datetime | None = None, end: datetime | None = None, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[ChatSessionMinimal]: return fetch_and_process_chat_session_history_minimal( @@ -320,7 +327,7 @@ def get_chat_session_history( @router.get("/admin/chat-session-history/{chat_session_id}") def get_chat_session_admin( chat_session_id: int, - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ChatSessionSnapshot: try: @@ -349,7 +356,7 @@ def get_chat_session_admin( @router.get("/admin/query-history-csv") def get_query_history_as_csv( - _: db_models.User | None = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> StreamingResponse: complete_chat_session_history = fetch_and_process_chat_session_history( @@ -369,7 +376,7 @@ def get_query_history_as_csv( # Create an in-memory text stream stream = io.StringIO() writer = csv.DictWriter( - stream, fieldnames=list(QuestionAnswerPairSnapshot.__fields__.keys()) + stream, fieldnames=list(QuestionAnswerPairSnapshot.model_fields.keys()) ) writer.writeheader() for row in question_answer_pairs: diff --git a/backend/ee/danswer/server/saml.py b/backend/ee/danswer/server/saml.py index 5fe57efe0ea..5bc62e98d61 100644 --- a/backend/ee/danswer/server/saml.py +++ b/backend/ee/danswer/server/saml.py @@ -50,7 +50,7 @@ async def upsert_saml_user(email: str) -> User: try: return await user_manager.get_by_email(email) except exceptions.UserNotExists: - logger.info("Creating user from SAML login") + logger.notice("Creating user from SAML login") user_count = await get_user_count() role = UserRole.ADMIN if user_count == 0 else UserRole.BASIC @@ -76,9 +76,13 @@ async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]: if request.client is None: raise ValueError("Invalid request for SAML") + # Use X-Forwarded headers if available + http_host = request.headers.get("X-Forwarded-Host") or request.client.host + server_port = request.headers.get("X-Forwarded-Port") or request.url.port + rv: dict[str, Any] = { - "http_host": request.client.host, - "server_port": request.url.port, + "http_host": http_host, + "server_port": server_port, "script_name": request.url.path, "post_data": {}, "get_data": {}, @@ -126,16 +130,20 @@ async def saml_login_callback( ) if not auth.is_authenticated(): + detail = "Access denied. User was not authenticated" + logger.error(detail) raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail="Access denied. User was not Authenticated.", + detail=detail, ) user_email = auth.get_attribute("email") if not user_email: + detail = "SAML is not set up correctly, email attribute must be provided." + logger.error(detail) raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail="SAML is not set up correctly, email attribute must be provided.", + detail=detail, ) user_email = user_email[0] diff --git a/backend/ee/danswer/server/seeding.py b/backend/ee/danswer/server/seeding.py index 20c57facbe5..bbca5acc20a 100644 --- a/backend/ee/danswer/server/seeding.py +++ b/backend/ee/danswer/server/seeding.py @@ -4,10 +4,8 @@ from sqlalchemy.orm import Session from danswer.db.engine import get_session_context_manager -from danswer.db.llm import fetch_existing_llm_providers from danswer.db.llm import update_default_provider from danswer.db.llm import upsert_llm_provider -from danswer.db.persona import get_personas from danswer.db.persona import upsert_persona from danswer.search.enums import RecencyBiasSetting from danswer.server.features.persona.models import CreatePersonaRequest @@ -15,13 +13,14 @@ from danswer.server.settings.models import Settings from danswer.server.settings.store import store_settings as store_base_settings from danswer.utils.logger import setup_logger +from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings +from ee.danswer.server.enterprise_settings.store import store_analytics_script from ee.danswer.server.enterprise_settings.store import ( store_settings as store_ee_settings, ) from ee.danswer.server.enterprise_settings.store import upload_logo - logger = setup_logger() _SEED_CONFIG_ENV_VAR_NAME = "ENV_SEED_CONFIGURATION" @@ -30,10 +29,12 @@ class SeedConfiguration(BaseModel): llms: list[LLMProviderUpsertRequest] | None = None admin_user_emails: list[str] | None = None - seeded_name: str | None = None seeded_logo_path: str | None = None personas: list[CreatePersonaRequest] | None = None settings: Settings | None = None + enterprise_settings: EnterpriseSettings | None = None + # Use existing `CUSTOM_ANALYTICS_SECRET_KEY` for reference + analytics_script_path: str | None = None def _parse_env() -> SeedConfiguration | None: @@ -47,71 +48,89 @@ def _parse_env() -> SeedConfiguration | None: def _seed_llms( db_session: Session, llm_upsert_requests: list[LLMProviderUpsertRequest] ) -> None: - # don't seed LLMs if we've already done this - existing_llms = fetch_existing_llm_providers(db_session) - if existing_llms: - return - - logger.info("Seeding LLMs") - seeded_providers = [ - upsert_llm_provider(db_session, llm_upsert_request) - for llm_upsert_request in llm_upsert_requests - ] - update_default_provider(db_session, seeded_providers[0].id) + if llm_upsert_requests: + logger.notice("Seeding LLMs") + seeded_providers = [ + upsert_llm_provider(db_session, llm_upsert_request) + for llm_upsert_request in llm_upsert_requests + ] + update_default_provider(db_session, seeded_providers[0].id) def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> None: - # don't seed personas if we've already done this - existing_personas = get_personas( - user_id=None, # Admin view - db_session=db_session, - include_default=True, - include_slack_bot_personas=True, - include_deleted=False, - ) - if existing_personas: - return - - logger.info("Seeding Personas") - for persona in personas: - upsert_persona( - user=None, # Seeding is done as admin - name=persona.name, - description=persona.description, - num_chunks=persona.num_chunks if persona.num_chunks is not None else 0.0, - llm_relevance_filter=persona.llm_relevance_filter, - llm_filter_extraction=persona.llm_filter_extraction, - recency_bias=RecencyBiasSetting.AUTO, - prompt_ids=persona.prompt_ids, - document_set_ids=persona.document_set_ids, - llm_model_provider_override=persona.llm_model_provider_override, - llm_model_version_override=persona.llm_model_version_override, - starter_messages=persona.starter_messages, - is_public=persona.is_public, - db_session=db_session, - tool_ids=persona.tool_ids, - ) + if personas: + logger.notice("Seeding Personas") + for persona in personas: + upsert_persona( + user=None, # Seeding is done as admin + name=persona.name, + description=persona.description, + num_chunks=persona.num_chunks + if persona.num_chunks is not None + else 0.0, + llm_relevance_filter=persona.llm_relevance_filter, + llm_filter_extraction=persona.llm_filter_extraction, + recency_bias=RecencyBiasSetting.AUTO, + prompt_ids=persona.prompt_ids, + document_set_ids=persona.document_set_ids, + llm_model_provider_override=persona.llm_model_provider_override, + llm_model_version_override=persona.llm_model_version_override, + starter_messages=persona.starter_messages, + is_public=persona.is_public, + db_session=db_session, + tool_ids=persona.tool_ids, + ) def _seed_settings(settings: Settings) -> None: - logger.info("Seeding Settings") + logger.notice("Seeding Settings") try: settings.check_validity() store_base_settings(settings) - logger.info("Successfully seeded Settings") + logger.notice("Successfully seeded Settings") except ValueError as e: logger.error(f"Failed to seed Settings: {str(e)}") +def _seed_enterprise_settings(seed_config: SeedConfiguration) -> None: + if seed_config.enterprise_settings is not None: + logger.notice("Seeding enterprise settings") + store_ee_settings(seed_config.enterprise_settings) + + +def _seed_logo(db_session: Session, logo_path: str | None) -> None: + if logo_path: + logger.notice("Uploading logo") + upload_logo(db_session=db_session, file=logo_path) + + +def _seed_analytics_script(seed_config: SeedConfiguration) -> None: + custom_analytics_secret_key = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY") + if seed_config.analytics_script_path and custom_analytics_secret_key: + logger.notice("Seeding analytics script") + try: + with open(seed_config.analytics_script_path, "r") as file: + script_content = file.read() + analytics_script = AnalyticsScriptUpload( + script=script_content, secret_key=custom_analytics_secret_key + ) + store_analytics_script(analytics_script) + except FileNotFoundError: + logger.error( + f"Analytics script file not found: {seed_config.analytics_script_path}" + ) + except ValueError as e: + logger.error(f"Failed to seed analytics script: {str(e)}") + + def get_seed_config() -> SeedConfiguration | None: return _parse_env() def seed_db() -> None: seed_config = _parse_env() - if seed_config is None: - logger.info("No seeding configuration file passed") + logger.debug("No seeding configuration file passed") return with get_session_context_manager() as db_session: @@ -122,16 +141,6 @@ def seed_db() -> None: if seed_config.settings is not None: _seed_settings(seed_config.settings) - is_seeded_logo = ( - upload_logo(db_session=db_session, file=seed_config.seeded_logo_path) - if seed_config.seeded_logo_path - else False - ) - seeded_name = seed_config.seeded_name - - if is_seeded_logo or seeded_name: - logger.info("Seeding enterprise settings") - seeded_settings = EnterpriseSettings( - application_name=seeded_name, use_custom_logo=is_seeded_logo - ) - store_ee_settings(seeded_settings) + _seed_logo(db_session, seed_config.seeded_logo_path) + _seed_enterprise_settings(seed_config) + _seed_analytics_script(seed_config) diff --git a/backend/ee/danswer/server/token_rate_limits/api.py b/backend/ee/danswer/server/token_rate_limits/api.py index aac3ebb16c0..97f1f15faed 100644 --- a/backend/ee/danswer/server/token_rate_limits/api.py +++ b/backend/ee/danswer/server/token_rate_limits/api.py @@ -5,14 +5,15 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.db.engine import get_session from danswer.db.models import User from danswer.server.query_and_chat.token_limit import any_rate_limit_exists from danswer.server.token_rate_limits.models import TokenRateLimitArgs from danswer.server.token_rate_limits.models import TokenRateLimitDisplay -from ee.danswer.db.token_limit import fetch_all_user_group_token_rate_limits from ee.danswer.db.token_limit import fetch_all_user_group_token_rate_limits_by_group from ee.danswer.db.token_limit import fetch_all_user_token_rate_limits +from ee.danswer.db.token_limit import fetch_user_group_token_rate_limits from ee.danswer.db.token_limit import insert_user_group_token_rate_limit from ee.danswer.db.token_limit import insert_user_token_rate_limit @@ -45,13 +46,13 @@ def get_all_group_token_limit_settings( @router.get("/user-group/{group_id}") def get_group_token_limit_settings( group_id: int, - _: User | None = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> list[TokenRateLimitDisplay]: return [ TokenRateLimitDisplay.from_db(token_rate_limit) - for token_rate_limit in fetch_all_user_group_token_rate_limits( - db_session, group_id + for token_rate_limit in fetch_user_group_token_rate_limits( + db_session, group_id, user ) ] diff --git a/backend/ee/danswer/server/user_group/api.py b/backend/ee/danswer/server/user_group/api.py index 6b5163d5417..e18487d5491 100644 --- a/backend/ee/danswer/server/user_group/api.py +++ b/backend/ee/danswer/server/user_group/api.py @@ -4,13 +4,18 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session -import danswer.db.models as db_models from danswer.auth.users import current_admin_user +from danswer.auth.users import current_curator_or_admin_user from danswer.db.engine import get_session +from danswer.db.models import User +from danswer.db.models import UserRole from ee.danswer.db.user_group import fetch_user_groups +from ee.danswer.db.user_group import fetch_user_groups_for_user from ee.danswer.db.user_group import insert_user_group from ee.danswer.db.user_group import prepare_user_group_for_deletion +from ee.danswer.db.user_group import update_user_curator_relationship from ee.danswer.db.user_group import update_user_group +from ee.danswer.server.user_group.models import SetCuratorRequest from ee.danswer.server.user_group.models import UserGroup from ee.danswer.server.user_group.models import UserGroupCreate from ee.danswer.server.user_group.models import UserGroupUpdate @@ -20,17 +25,24 @@ @router.get("/admin/user-group") def list_user_groups( - _: db_models.User = Depends(current_admin_user), + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> list[UserGroup]: - user_groups = fetch_user_groups(db_session, only_current=False) + if user is None or user.role == UserRole.ADMIN: + user_groups = fetch_user_groups(db_session, only_current=False) + else: + user_groups = fetch_user_groups_for_user( + db_session=db_session, + user_id=user.id, + only_curator_groups=user.role == UserRole.CURATOR, + ) return [UserGroup.from_model(user_group) for user_group in user_groups] @router.post("/admin/user-group") def create_user_group( user_group: UserGroupCreate, - _: db_models.User = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> UserGroup: try: @@ -47,13 +59,35 @@ def create_user_group( @router.patch("/admin/user-group/{user_group_id}") def patch_user_group( user_group_id: int, - user_group: UserGroupUpdate, - _: db_models.User = Depends(current_admin_user), + user_group_update: UserGroupUpdate, + user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> UserGroup: try: return UserGroup.from_model( - update_user_group(db_session, user_group_id, user_group) + update_user_group( + db_session=db_session, + user=user, + user_group_id=user_group_id, + user_group_update=user_group_update, + ) + ) + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +@router.post("/admin/user-group/{user_group_id}/set-curator") +def set_user_curator( + user_group_id: int, + set_curator_request: SetCuratorRequest, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + try: + update_user_curator_relationship( + db_session=db_session, + user_group_id=user_group_id, + set_curator_request=set_curator_request, ) except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) @@ -62,7 +96,7 @@ def patch_user_group( @router.delete("/admin/user-group/{user_group_id}") def delete_user_group( user_group_id: int, - _: db_models.User = Depends(current_admin_user), + _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> None: try: diff --git a/backend/ee/danswer/server/user_group/models.py b/backend/ee/danswer/server/user_group/models.py index fafb73a3e09..077a217e932 100644 --- a/backend/ee/danswer/server/user_group/models.py +++ b/backend/ee/danswer/server/user_group/models.py @@ -16,6 +16,7 @@ class UserGroup(BaseModel): id: int name: str users: list[UserInfo] + curator_ids: list[UUID] cc_pairs: list[ConnectorCredentialPairDescriptor] document_sets: list[DocumentSet] personas: list[PersonaSnapshot] @@ -36,11 +37,17 @@ def from_model(cls, user_group_model: UserGroupModel) -> "UserGroup": is_verified=user.is_verified, role=user.role, preferences=UserPreferences( - chosen_assistants=user.chosen_assistants + default_model=user.default_model, + chosen_assistants=user.chosen_assistants, ), ) for user in user_group_model.users ], + curator_ids=[ + user.user_id + for user in user_group_model.user_group_relationships + if user.is_curator and user.user_id is not None + ], cc_pairs=[ ConnectorCredentialPairDescriptor( id=cc_pair_relationship.cc_pair.id, @@ -61,6 +68,7 @@ def from_model(cls, user_group_model: UserGroupModel) -> "UserGroup": personas=[ PersonaSnapshot.from_model(persona) for persona in user_group_model.personas + if not persona.deleted ], is_up_to_date=user_group_model.is_up_to_date, is_up_for_deletion=user_group_model.is_up_for_deletion, @@ -76,3 +84,8 @@ class UserGroupCreate(BaseModel): class UserGroupUpdate(BaseModel): user_ids: list[UUID] cc_pair_ids: list[int] + + +class SetCuratorRequest(BaseModel): + user_id: UUID + is_curator: bool diff --git a/backend/ee/danswer/user_groups/sync.py b/backend/ee/danswer/user_groups/sync.py index e33655ba2f9..e3bea192670 100644 --- a/backend/ee/danswer/user_groups/sync.py +++ b/backend/ee/danswer/user_groups/sync.py @@ -2,8 +2,8 @@ from danswer.access.access import get_access_for_documents from danswer.db.document import prepare_to_modify_documents -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.embedding_model import get_secondary_db_embedding_model +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import UpdateRequest @@ -47,13 +47,13 @@ def _sync_user_group_batch( def sync_user_groups(user_group_id: int, db_session: Session) -> None: """Sync the status of Postgres for the specified user group""" - db_embedding_model = get_current_db_embedding_model(db_session) - secondary_db_embedding_model = get_secondary_db_embedding_model(db_session) + search_settings = get_current_search_settings(db_session) + secondary_search_settings = get_secondary_search_settings(db_session) document_index = get_default_document_index( - primary_index_name=db_embedding_model.index_name, - secondary_index_name=secondary_db_embedding_model.index_name - if secondary_db_embedding_model + primary_index_name=search_settings.index_name, + secondary_index_name=secondary_search_settings.index_name + if secondary_search_settings else None, ) diff --git a/backend/model_server/constants.py b/backend/model_server/constants.py index bc842f5461e..d6991b40203 100644 --- a/backend/model_server/constants.py +++ b/backend/model_server/constants.py @@ -1 +1,30 @@ +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import EmbedTextType + + MODEL_WARM_UP_STRING = "hi " * 512 +DEFAULT_OPENAI_MODEL = "text-embedding-3-small" +DEFAULT_COHERE_MODEL = "embed-english-light-v3.0" +DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct" +DEFAULT_VERTEX_MODEL = "text-embedding-004" + + +class EmbeddingModelTextType: + PROVIDER_TEXT_TYPE_MAP = { + EmbeddingProvider.COHERE: { + EmbedTextType.QUERY: "search_query", + EmbedTextType.PASSAGE: "search_document", + }, + EmbeddingProvider.VOYAGE: { + EmbedTextType.QUERY: "query", + EmbedTextType.PASSAGE: "document", + }, + EmbeddingProvider.GOOGLE: { + EmbedTextType.QUERY: "RETRIEVAL_QUERY", + EmbedTextType.PASSAGE: "RETRIEVAL_DOCUMENT", + }, + } + + @staticmethod + def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str: + return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type] diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py index ee97ded7843..38bf4b077fa 100644 --- a/backend/model_server/custom_models.py +++ b/backend/model_server/custom_models.py @@ -1,75 +1,200 @@ -from typing import Optional - -import numpy as np -import tensorflow as tf # type: ignore +import torch +import torch.nn.functional as F from fastapi import APIRouter +from huggingface_hub import snapshot_download # type: ignore from transformers import AutoTokenizer # type: ignore -from transformers import TFDistilBertForSequenceClassification +from transformers import BatchEncoding +from danswer.utils.logger import setup_logger from model_server.constants import MODEL_WARM_UP_STRING +from model_server.danswer_torch_model import HybridClassifier from model_server.utils import simple_log_function_time from shared_configs.configs import INDEXING_ONLY -from shared_configs.configs import INTENT_MODEL_CONTEXT_SIZE +from shared_configs.configs import INTENT_MODEL_TAG from shared_configs.configs import INTENT_MODEL_VERSION from shared_configs.model_server_models import IntentRequest from shared_configs.model_server_models import IntentResponse +logger = setup_logger() router = APIRouter(prefix="/custom") -_INTENT_TOKENIZER: Optional[AutoTokenizer] = None -_INTENT_MODEL: Optional[TFDistilBertForSequenceClassification] = None +_INTENT_TOKENIZER: AutoTokenizer | None = None +_INTENT_MODEL: HybridClassifier | None = None -def get_intent_model_tokenizer( - model_name: str = INTENT_MODEL_VERSION, -) -> "AutoTokenizer": +def get_intent_model_tokenizer() -> AutoTokenizer: global _INTENT_TOKENIZER if _INTENT_TOKENIZER is None: - _INTENT_TOKENIZER = AutoTokenizer.from_pretrained(model_name) + # The tokenizer details are not uploaded to the HF hub since it's just the + # unmodified distilbert tokenizer. + _INTENT_TOKENIZER = AutoTokenizer.from_pretrained("distilbert-base-uncased") return _INTENT_TOKENIZER def get_local_intent_model( - model_name: str = INTENT_MODEL_VERSION, - max_context_length: int = INTENT_MODEL_CONTEXT_SIZE, -) -> TFDistilBertForSequenceClassification: + model_name_or_path: str = INTENT_MODEL_VERSION, + tag: str = INTENT_MODEL_TAG, +) -> HybridClassifier: global _INTENT_MODEL - if _INTENT_MODEL is None or max_context_length != _INTENT_MODEL.max_seq_length: - _INTENT_MODEL = TFDistilBertForSequenceClassification.from_pretrained( - model_name - ) - _INTENT_MODEL.max_seq_length = max_context_length + if _INTENT_MODEL is None: + try: + # Calculate where the cache should be, then load from local if available + logger.notice(f"Loading model from local cache: {model_name_or_path}") + local_path = snapshot_download( + repo_id=model_name_or_path, revision=tag, local_files_only=True + ) + _INTENT_MODEL = HybridClassifier.from_pretrained(local_path) + logger.notice(f"Loaded model from local cache: {local_path}") + except Exception as e: + logger.warning(f"Failed to load model directly: {e}") + try: + # Attempt to download the model snapshot + logger.notice(f"Downloading model snapshot for {model_name_or_path}") + local_path = snapshot_download(repo_id=model_name_or_path, revision=tag) + _INTENT_MODEL = HybridClassifier.from_pretrained(local_path) + except Exception as e: + logger.error( + f"Failed to load model even after attempted snapshot download: {e}" + ) + raise return _INTENT_MODEL def warm_up_intent_model() -> None: + logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}") intent_tokenizer = get_intent_model_tokenizer() - inputs = intent_tokenizer( - MODEL_WARM_UP_STRING, return_tensors="tf", truncation=True, padding=True + tokens = intent_tokenizer( + MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True + ) + + intent_model = get_local_intent_model() + device = intent_model.device + intent_model( + query_ids=tokens["input_ids"].to(device), + query_mask=tokens["attention_mask"].to(device), ) - get_local_intent_model()(inputs) @simple_log_function_time() -def classify_intent(query: str) -> list[float]: - tokenizer = get_intent_model_tokenizer() +def run_inference(tokens: BatchEncoding) -> tuple[list[float], list[float]]: intent_model = get_local_intent_model() - model_input = tokenizer(query, return_tensors="tf", truncation=True, padding=True) + device = intent_model.device + + outputs = intent_model( + query_ids=tokens["input_ids"].to(device), + query_mask=tokens["attention_mask"].to(device), + ) + + token_logits = outputs["token_logits"] + intent_logits = outputs["intent_logits"] + + # Move tensors to CPU before applying softmax and converting to numpy + intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0] + token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0] + + # Extract the probabilities for the positive class (index 1) for each token + token_positive_probs = token_probabilities[:, 1].tolist() + + return intent_probabilities.tolist(), token_positive_probs + + +def map_keywords( + input_ids: torch.Tensor, tokenizer: AutoTokenizer, is_keyword: list[bool] +) -> list[str]: + tokens = tokenizer.convert_ids_to_tokens(input_ids) + + if not len(tokens) == len(is_keyword): + raise ValueError("Length of tokens and keyword predictions must match") + + if input_ids[0] == tokenizer.cls_token_id: + tokens = tokens[1:] + is_keyword = is_keyword[1:] + + if input_ids[-1] == tokenizer.sep_token_id: + tokens = tokens[:-1] + is_keyword = is_keyword[:-1] + + unk_token = tokenizer.unk_token + if unk_token in tokens: + raise ValueError("Unknown token detected in the input") + + keywords = [] + current_keyword = "" + + for ind, token in enumerate(tokens): + if is_keyword[ind]: + if token.startswith("##"): + current_keyword += token[2:] + else: + if current_keyword: + keywords.append(current_keyword) + current_keyword = token + else: + # If mispredicted a later token of a keyword, add it to the current keyword + # to complete it + if current_keyword: + if len(current_keyword) > 2 and current_keyword.startswith("##"): + current_keyword = current_keyword[2:] + + else: + keywords.append(current_keyword) + current_keyword = "" + + if current_keyword: + keywords.append(current_keyword) + + return keywords + + +def clean_keywords(keywords: list[str]) -> list[str]: + cleaned_words = [] + for word in keywords: + word = word[:-2] if word.endswith("'s") else word + word = word.replace("/", " ") + word = word.replace("'", "").replace('"', "") + cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()]) + return cleaned_words + + +def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]: + tokenizer = get_intent_model_tokenizer() + model_input = tokenizer( + intent_req.query, return_tensors="pt", truncation=False, padding=False + ) + + if len(model_input.input_ids[0]) > 512: + # If the user text is too long, assume it is semantic and keep all words + return True, intent_req.query.split() + + intent_probs, token_probs = run_inference(model_input) + + is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold + + keyword_preds = [ + token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs + ] + + try: + keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds) + except Exception as e: + logger.error( + f"Failed to extract keywords for query: {intent_req.query} due to {e}" + ) + # Fallback to keeping all words + keywords = intent_req.query.split() - predictions = intent_model(model_input)[0] - probabilities = tf.nn.softmax(predictions, axis=-1) + cleaned_keywords = clean_keywords(keywords) - class_percentages = np.round(probabilities.numpy() * 100, 2) - return list(class_percentages.tolist()[0]) + return is_keyword_sequence, cleaned_keywords -@router.post("/intent-model") -async def process_intent_request( +@router.post("/query-analysis") +async def process_analysis_request( intent_request: IntentRequest, ) -> IntentResponse: if INDEXING_ONLY: raise RuntimeError("Indexing model server should not call intent endpoint") - class_percentages = classify_intent(intent_request.query) - return IntentResponse(class_probs=class_percentages) + is_keyword, keywords = run_analysis(intent_request) + return IntentResponse(is_keyword=is_keyword, keywords=keywords) diff --git a/backend/model_server/danswer_torch_model.py b/backend/model_server/danswer_torch_model.py new file mode 100644 index 00000000000..28554a4fd2d --- /dev/null +++ b/backend/model_server/danswer_torch_model.py @@ -0,0 +1,74 @@ +import json +import os + +import torch +import torch.nn as nn +from transformers import DistilBertConfig # type: ignore +from transformers import DistilBertModel + + +class HybridClassifier(nn.Module): + def __init__(self) -> None: + super().__init__() + config = DistilBertConfig() + self.distilbert = DistilBertModel(config) + + # Keyword tokenwise binary classification layer + self.keyword_classifier = nn.Linear(self.distilbert.config.dim, 2) + + # Intent Classifier layers + self.pre_classifier = nn.Linear( + self.distilbert.config.dim, self.distilbert.config.dim + ) + self.intent_classifier = nn.Linear(self.distilbert.config.dim, 2) + self.dropout = nn.Dropout(self.distilbert.config.seq_classif_dropout) + + self.device = torch.device("cpu") + + def forward( + self, + query_ids: torch.Tensor, + query_mask: torch.Tensor, + ) -> dict[str, torch.Tensor]: + outputs = self.distilbert(input_ids=query_ids, attention_mask=query_mask) + sequence_output = outputs.last_hidden_state + + # Intent classification on the CLS token + cls_token_state = sequence_output[:, 0, :] + pre_classifier_out = self.pre_classifier(cls_token_state) + dropout_out = self.dropout(pre_classifier_out) + intent_logits = self.intent_classifier(dropout_out) + + # Keyword classification on all tokens + token_logits = self.keyword_classifier(sequence_output) + + return {"intent_logits": intent_logits, "token_logits": token_logits} + + @classmethod + def from_pretrained(cls, load_directory: str) -> "HybridClassifier": + model_path = os.path.join(load_directory, "pytorch_model.bin") + config_path = os.path.join(load_directory, "config.json") + + with open(config_path, "r") as f: + config = json.load(f) + model = cls(**config) + + if torch.backends.mps.is_available(): + # Apple silicon GPU + device = torch.device("mps") + elif torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + + model.load_state_dict(torch.load(model_path, map_location=device)) + model = model.to(device) + + model.device = device + + model.eval() + # Eval doesn't set requires_grad to False, do it manually to save memory and have faster inference + for param in model.parameters(): + param.requires_grad = False + + return model diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 705386a8c4b..4e97bd00f27 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -1,28 +1,198 @@ -import gc +import json +from typing import Any from typing import Optional +import openai +import vertexai # type: ignore +import voyageai # type: ignore +from cohere import Client as CohereClient from fastapi import APIRouter from fastapi import HTTPException +from google.oauth2 import service_account # type: ignore +from retry import retry from sentence_transformers import CrossEncoder # type: ignore from sentence_transformers import SentenceTransformer # type: ignore +from vertexai.language_models import TextEmbeddingInput # type: ignore +from vertexai.language_models import TextEmbeddingModel # type: ignore from danswer.utils.logger import setup_logger -from model_server.constants import MODEL_WARM_UP_STRING +from model_server.constants import DEFAULT_COHERE_MODEL +from model_server.constants import DEFAULT_OPENAI_MODEL +from model_server.constants import DEFAULT_VERTEX_MODEL +from model_server.constants import DEFAULT_VOYAGE_MODEL +from model_server.constants import EmbeddingModelTextType +from model_server.constants import EmbeddingProvider from model_server.utils import simple_log_function_time -from shared_configs.configs import CROSS_EMBED_CONTEXT_SIZE -from shared_configs.configs import CROSS_ENCODER_MODEL_ENSEMBLE from shared_configs.configs import INDEXING_ONLY +from shared_configs.enums import EmbedTextType +from shared_configs.enums import RerankerProvider +from shared_configs.model_server_models import Embedding from shared_configs.model_server_models import EmbedRequest from shared_configs.model_server_models import EmbedResponse from shared_configs.model_server_models import RerankRequest from shared_configs.model_server_models import RerankResponse +from shared_configs.utils import batch_list + logger = setup_logger() router = APIRouter(prefix="/encoder") _GLOBAL_MODELS_DICT: dict[str, "SentenceTransformer"] = {} -_RERANK_MODELS: Optional[list["CrossEncoder"]] = None +_RERANK_MODEL: Optional["CrossEncoder"] = None + +# If we are not only indexing, dont want retry very long +_RETRY_DELAY = 10 if INDEXING_ONLY else 0.1 +_RETRY_TRIES = 10 if INDEXING_ONLY else 2 + +# OpenAI only allows 2048 embeddings to be computed at once +_OPENAI_MAX_INPUT_LEN = 2048 +# Cohere allows up to 96 embeddings in a single embedding calling +_COHERE_MAX_INPUT_LEN = 96 + + +def _initialize_client( + api_key: str, provider: EmbeddingProvider, model: str | None = None +) -> Any: + if provider == EmbeddingProvider.OPENAI: + return openai.OpenAI(api_key=api_key) + elif provider == EmbeddingProvider.COHERE: + return CohereClient(api_key=api_key) + elif provider == EmbeddingProvider.VOYAGE: + return voyageai.Client(api_key=api_key) + elif provider == EmbeddingProvider.GOOGLE: + credentials = service_account.Credentials.from_service_account_info( + json.loads(api_key) + ) + project_id = json.loads(api_key)["project_id"] + vertexai.init(project=project_id, credentials=credentials) + return TextEmbeddingModel.from_pretrained(model or DEFAULT_VERTEX_MODEL) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +class CloudEmbedding: + def __init__( + self, + api_key: str, + provider: EmbeddingProvider, + # Only for Google as is needed on client setup + model: str | None = None, + ) -> None: + self.provider = provider + self.client = _initialize_client(api_key, self.provider, model) + + def _embed_openai(self, texts: list[str], model: str | None) -> list[Embedding]: + if model is None: + model = DEFAULT_OPENAI_MODEL + + # OpenAI does not seem to provide truncation option, however + # the context lengths used by Danswer currently are smaller than the max token length + # for OpenAI embeddings so it's not a big deal + final_embeddings: list[Embedding] = [] + try: + for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN): + response = self.client.embeddings.create(input=text_batch, model=model) + final_embeddings.extend( + [embedding.embedding for embedding in response.data] + ) + return final_embeddings + except Exception as e: + error_string = ( + f"Error embedding text with OpenAI: {str(e)} \n" + f"Model: {model} \n" + f"Provider: {self.provider} \n" + f"Texts: {texts}" + ) + logger.error(error_string) + raise RuntimeError(error_string) + + def _embed_cohere( + self, texts: list[str], model: str | None, embedding_type: str + ) -> list[Embedding]: + if model is None: + model = DEFAULT_COHERE_MODEL + + final_embeddings: list[Embedding] = [] + for text_batch in batch_list(texts, _COHERE_MAX_INPUT_LEN): + # Does not use the same tokenizer as the Danswer API server but it's approximately the same + # empirically it's only off by a very few tokens so it's not a big deal + response = self.client.embed( + texts=text_batch, + model=model, + input_type=embedding_type, + truncate="END", + ) + final_embeddings.extend(response.embeddings) + return final_embeddings + + def _embed_voyage( + self, texts: list[str], model: str | None, embedding_type: str + ) -> list[Embedding]: + if model is None: + model = DEFAULT_VOYAGE_MODEL + + # Similar to Cohere, the API server will do approximate size chunking + # it's acceptable to miss by a few tokens + response = self.client.embed( + texts, + model=model, + input_type=embedding_type, + truncation=True, # Also this is default + ) + return response.embeddings + + def _embed_vertex( + self, texts: list[str], model: str | None, embedding_type: str + ) -> list[Embedding]: + if model is None: + model = DEFAULT_VERTEX_MODEL + + embeddings = self.client.get_embeddings( + [ + TextEmbeddingInput( + text, + embedding_type, + ) + for text in texts + ], + auto_truncate=True, # Also this is default + ) + return [embedding.values for embedding in embeddings] + + @retry(tries=_RETRY_TRIES, delay=_RETRY_DELAY) + def embed( + self, + *, + texts: list[str], + text_type: EmbedTextType, + model_name: str | None = None, + ) -> list[Embedding]: + try: + if self.provider == EmbeddingProvider.OPENAI: + return self._embed_openai(texts, model_name) + + embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type) + if self.provider == EmbeddingProvider.COHERE: + return self._embed_cohere(texts, model_name, embedding_type) + elif self.provider == EmbeddingProvider.VOYAGE: + return self._embed_voyage(texts, model_name, embedding_type) + elif self.provider == EmbeddingProvider.GOOGLE: + return self._embed_vertex(texts, model_name, embedding_type) + else: + raise ValueError(f"Unsupported provider: {self.provider}") + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error embedding text with {self.provider}: {str(e)}", + ) + + @staticmethod + def create( + api_key: str, provider: EmbeddingProvider, model: str | None = None + ) -> "CloudEmbedding": + logger.debug(f"Creating Embedding instance for provider: {provider}") + return CloudEmbedding(api_key, provider, model) def get_embedding_model( @@ -37,8 +207,15 @@ def get_embedding_model( _GLOBAL_MODELS_DICT = {} if model_name not in _GLOBAL_MODELS_DICT: - logger.info(f"Loading {model_name}") - model = SentenceTransformer(model_name) + logger.notice(f"Loading {model_name}") + # Some model architectures that aren't built into the Transformers or Sentence + # Transformer need to be downloaded to be loaded locally. This does not mean + # data is sent to remote servers for inference, however the remote code can + # be fairly arbitrary so only use trusted models + model = SentenceTransformer( + model_name_or_path=model_name, + trust_remote_code=True, + ) model.max_seq_length = max_context_length _GLOBAL_MODELS_DICT[model_name] = model elif max_context_length != _GLOBAL_MODELS_DICT[model_name].max_seq_length: @@ -47,88 +224,170 @@ def get_embedding_model( return _GLOBAL_MODELS_DICT[model_name] -def get_local_reranking_model_ensemble( - model_names: list[str] = CROSS_ENCODER_MODEL_ENSEMBLE, - max_context_length: int = CROSS_EMBED_CONTEXT_SIZE, -) -> list[CrossEncoder]: - global _RERANK_MODELS - if _RERANK_MODELS is None or max_context_length != _RERANK_MODELS[0].max_length: - del _RERANK_MODELS - gc.collect() - - _RERANK_MODELS = [] - for model_name in model_names: - logger.info(f"Loading {model_name}") - model = CrossEncoder(model_name) - model.max_length = max_context_length - _RERANK_MODELS.append(model) - return _RERANK_MODELS - - -def warm_up_cross_encoders() -> None: - logger.info(f"Warming up Cross-Encoders: {CROSS_ENCODER_MODEL_ENSEMBLE}") - - cross_encoders = get_local_reranking_model_ensemble() - [ - cross_encoder.predict((MODEL_WARM_UP_STRING, MODEL_WARM_UP_STRING)) - for cross_encoder in cross_encoders - ] +def get_local_reranking_model( + model_name: str, +) -> CrossEncoder: + global _RERANK_MODEL + if _RERANK_MODEL is None: + logger.notice(f"Loading {model_name}") + model = CrossEncoder(model_name) + _RERANK_MODEL = model + return _RERANK_MODEL @simple_log_function_time() def embed_text( texts: list[str], - model_name: str, + text_type: EmbedTextType, + model_name: str | None, max_context_length: int, normalize_embeddings: bool, -) -> list[list[float]]: - model = get_embedding_model( - model_name=model_name, max_context_length=max_context_length - ) - embeddings = model.encode(texts, normalize_embeddings=normalize_embeddings) + api_key: str | None, + provider_type: EmbeddingProvider | None, + prefix: str | None, +) -> list[Embedding]: + if not all(texts): + raise ValueError("Empty strings are not allowed for embedding.") + + # Third party API based embedding model + if not texts: + raise ValueError("No texts provided for embedding.") + elif provider_type is not None: + logger.debug(f"Embedding text with provider: {provider_type}") + if api_key is None: + raise RuntimeError("API key not provided for cloud model") + + if prefix: + # This may change in the future if some providers require the user + # to manually append a prefix but this is not the case currently + raise ValueError( + "Prefix string is not valid for cloud models. " + "Cloud models take an explicit text type instead." + ) - if not isinstance(embeddings, list): - embeddings = embeddings.tolist() + cloud_model = CloudEmbedding( + api_key=api_key, provider=provider_type, model=model_name + ) + embeddings = cloud_model.embed( + texts=texts, + model_name=model_name, + text_type=text_type, + ) + + # Check for None values in embeddings + if any(embedding is None for embedding in embeddings): + error_message = "Embeddings contain None values\n" + error_message += "Corresponding texts:\n" + error_message += "\n".join(texts) + raise ValueError(error_message) + + elif model_name is not None: + prefixed_texts = [f"{prefix}{text}" for text in texts] if prefix else texts + + local_model = get_embedding_model( + model_name=model_name, max_context_length=max_context_length + ) + embeddings_vectors = local_model.encode( + prefixed_texts, normalize_embeddings=normalize_embeddings + ) + embeddings = [ + embedding if isinstance(embedding, list) else embedding.tolist() + for embedding in embeddings_vectors + ] + + else: + raise ValueError( + "Either model name or provider must be provided to run embeddings." + ) return embeddings @simple_log_function_time() -def calc_sim_scores(query: str, docs: list[str]) -> list[list[float]]: - cross_encoders = get_local_reranking_model_ensemble() - sim_scores = [ - encoder.predict([(query, doc) for doc in docs]).tolist() # type: ignore - for encoder in cross_encoders - ] - return sim_scores +def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]: + cross_encoder = get_local_reranking_model(model_name) + return cross_encoder.predict([(query, doc) for doc in docs]).tolist() # type: ignore + + +def cohere_rerank( + query: str, docs: list[str], model_name: str, api_key: str +) -> list[float]: + cohere_client = CohereClient(api_key=api_key) + response = cohere_client.rerank(query=query, documents=docs, model=model_name) + results = response.results + sorted_results = sorted(results, key=lambda item: item.index) + return [result.relevance_score for result in sorted_results] @router.post("/bi-encoder-embed") async def process_embed_request( embed_request: EmbedRequest, ) -> EmbedResponse: + if not embed_request.texts: + raise HTTPException(status_code=400, detail="No texts to be embedded") + elif not all(embed_request.texts): + raise ValueError("Empty strings are not allowed for embedding.") + try: + if embed_request.text_type == EmbedTextType.QUERY: + prefix = embed_request.manual_query_prefix + elif embed_request.text_type == EmbedTextType.PASSAGE: + prefix = embed_request.manual_passage_prefix + else: + prefix = None + embeddings = embed_text( texts=embed_request.texts, model_name=embed_request.model_name, max_context_length=embed_request.max_context_length, normalize_embeddings=embed_request.normalize_embeddings, + api_key=embed_request.api_key, + provider_type=embed_request.provider_type, + text_type=embed_request.text_type, + prefix=prefix, ) return EmbedResponse(embeddings=embeddings) except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + exception_detail = f"Error during embedding process:\n{str(e)}" + logger.exception(exception_detail) + raise HTTPException(status_code=500, detail=exception_detail) @router.post("/cross-encoder-scores") -async def process_rerank_request(embed_request: RerankRequest) -> RerankResponse: +async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse: """Cross encoders can be purely black box from the app perspective""" if INDEXING_ONLY: raise RuntimeError("Indexing model server should not call intent endpoint") - try: - sim_scores = calc_sim_scores( - query=embed_request.query, docs=embed_request.documents + if not rerank_request.documents or not rerank_request.query: + raise HTTPException( + status_code=400, detail="Missing documents or query for reranking" ) - return RerankResponse(scores=sim_scores) + if not all(rerank_request.documents): + raise ValueError("Empty documents cannot be reranked.") + + try: + if rerank_request.provider_type is None: + sim_scores = local_rerank( + query=rerank_request.query, + docs=rerank_request.documents, + model_name=rerank_request.model_name, + ) + return RerankResponse(scores=sim_scores) + elif rerank_request.provider_type == RerankerProvider.COHERE: + if rerank_request.api_key is None: + raise RuntimeError("Cohere Rerank Requires an API Key") + sim_scores = cohere_rerank( + query=rerank_request.query, + docs=rerank_request.documents, + model_name=rerank_request.model_name, + api_key=rerank_request.api_key, + ) + return RerankResponse(scores=sim_scores) + else: + raise ValueError(f"Unsupported provider: {rerank_request.provider_type}") except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + logger.exception(f"Error during reranking process:\n{str(e)}") + raise HTTPException( + status_code=500, detail="Failed to run Cross-Encoder reranking" + ) diff --git a/backend/model_server/main.py b/backend/model_server/main.py index 1aaf9567874..5c7979475c7 100644 --- a/backend/model_server/main.py +++ b/backend/model_server/main.py @@ -1,6 +1,8 @@ import os +import shutil from collections.abc import AsyncGenerator from contextlib import asynccontextmanager +from pathlib import Path import torch import uvicorn @@ -12,10 +14,7 @@ from model_server.custom_models import router as custom_models_router from model_server.custom_models import warm_up_intent_model from model_server.encoders import router as encoders_router -from model_server.encoders import warm_up_cross_encoders from model_server.management_endpoints import router as management_router -from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW -from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW from shared_configs.configs import INDEXING_ONLY from shared_configs.configs import MIN_THREADS_ML_MODELS from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST @@ -24,27 +23,56 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" +HF_CACHE_PATH = Path("/root/.cache/huggingface/") +TEMP_HF_CACHE_PATH = Path("/root/.cache/temp_huggingface/") + transformer_logging.set_verbosity_error() logger = setup_logger() +def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -> None: + """ + This moves the files from the temp huggingface cache to the huggingface cache + + We have to move each file individually because the directories might + have the same name but not the same contents and we dont want to remove + the files in the existing huggingface cache that don't exist in the temp + huggingface cache. + """ + for item in source.iterdir(): + target_path = dest / item.relative_to(source) + if item.is_dir(): + _move_files_recursively(item, target_path, overwrite) + else: + target_path.parent.mkdir(parents=True, exist_ok=True) + if target_path.exists() and not overwrite: + continue + shutil.move(str(item), str(target_path)) + + @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: if torch.cuda.is_available(): - logger.info("GPU is available") + logger.notice("CUDA GPU is available") + elif torch.backends.mps.is_available(): + logger.notice("Mac MPS is available") else: - logger.info("GPU is not available") + logger.notice("GPU is not available, using CPU") + + if TEMP_HF_CACHE_PATH.is_dir(): + logger.notice("Moving contents of temp_huggingface to huggingface cache.") + _move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH) + shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True) + logger.notice("Moved contents of temp_huggingface to huggingface cache.") torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads())) - logger.info(f"Torch Threads: {torch.get_num_threads()}") + logger.notice(f"Torch Threads: {torch.get_num_threads()}") if not INDEXING_ONLY: warm_up_intent_model() - if ENABLE_RERANKING_REAL_TIME_FLOW or ENABLE_RERANKING_ASYNC_FLOW: - warm_up_cross_encoders() else: - logger.info("This model server should only run document indexing.") + logger.notice("This model server should only run document indexing.") yield @@ -65,8 +93,8 @@ def get_model_app() -> FastAPI: if __name__ == "__main__": - logger.info( + logger.notice( f"Starting Danswer Model Server on http://{MODEL_SERVER_ALLOWED_HOST}:{str(MODEL_SERVER_PORT)}/" ) - logger.info(f"Model Server Version: {__version__}") + logger.notice(f"Model Server Version: {__version__}") uvicorn.run(app, host=MODEL_SERVER_ALLOWED_HOST, port=MODEL_SERVER_PORT) diff --git a/backend/model_server/management_endpoints.py b/backend/model_server/management_endpoints.py index fc1b8901e10..56640a2fa73 100644 --- a/backend/model_server/management_endpoints.py +++ b/backend/model_server/management_endpoints.py @@ -1,3 +1,4 @@ +import torch from fastapi import APIRouter from fastapi import Response @@ -7,3 +8,13 @@ @router.get("/health") def healthcheck() -> Response: return Response(status_code=200) + + +@router.get("/gpu-status") +def gpu_status() -> dict[str, bool | str]: + if torch.cuda.is_available(): + return {"gpu_available": True, "type": "cuda"} + elif torch.backends.mps.is_available(): + return {"gpu_available": True, "type": "mps"} + else: + return {"gpu_available": False, "type": "none"} diff --git a/backend/model_server/utils.py b/backend/model_server/utils.py index 3ebae26e5b6..0c2d6bac5dc 100644 --- a/backend/model_server/utils.py +++ b/backend/model_server/utils.py @@ -32,7 +32,7 @@ def wrapped_func(*args: Any, **kwargs: Any) -> Any: if debug_only: logger.debug(final_log) else: - logger.info(final_log) + logger.notice(final_log) return result diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index a5fe49da52f..37e603f9b46 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -31,20 +31,20 @@ langchain==0.1.17 langchain-community==0.0.36 langchain-core==0.1.50 langchain-text-splitters==0.0.1 -litellm==1.37.7 +litellm==1.43.18 llama-index==0.9.45 Mako==1.2.4 msal==1.26.0 nltk==3.8.1 Office365-REST-Python-Client==2.5.9 oauthlib==3.2.2 -openai==1.14.3 +openai==1.41.1 openpyxl==3.1.2 playwright==1.41.2 psutil==5.9.5 psycopg2-binary==2.9.9 pycryptodome==3.19.1 -pydantic==1.10.13 +pydantic==2.8.2 PyGithub==1.58.2 python-dateutil==2.8.2 python-gitlab==3.9.0 @@ -66,7 +66,7 @@ slack-sdk==3.20.2 SQLAlchemy[mypy]==2.0.15 starlette==0.36.3 supervisor==4.2.5 -tiktoken==0.4.0 +tiktoken==0.7.0 timeago==1.0.16 transformers==4.39.2 uvicorn==0.21.1 @@ -74,4 +74,5 @@ zulip==0.8.2 hubspot-api-client==8.1.0 zenpy==2.0.41 dropbox==11.36.2 -boto3-stubs[s3]==1.34.133 \ No newline at end of file +boto3-stubs[s3]==1.34.133 +ultimate_sitemap_parser==0.5 diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 4ef8ffa5b6a..0fb0e74b67b 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -1,9 +1,14 @@ +cohere==5.6.1 +einops==0.8.0 fastapi==0.109.2 -h5py==3.9.0 -pydantic==1.10.13 +google-cloud-aiplatform==1.58.0 +numpy==1.26.4 +openai==1.41.1 +pydantic==2.8.2 +retry==0.9.2 safetensors==0.4.2 sentence-transformers==2.6.1 -tensorflow==2.15.0 torch==2.0.1 transformers==4.39.2 uvicorn==0.21.1 +voyageai==0.2.3 diff --git a/backend/scripts/api_inference_sample.py b/backend/scripts/api_inference_sample.py index b9325194b8d..9a93fdb73dd 100644 --- a/backend/scripts/api_inference_sample.py +++ b/backend/scripts/api_inference_sample.py @@ -33,6 +33,7 @@ def process_question(danswer_url: str, question: str, api_key: str | None) -> No "message": question, "chat_session_id": chat_session_id, "parent_message_id": None, + "file_descriptors": [], # Default Question Answer prompt "prompt_id": 0, # Not specifying any specific docs to chat to, we want to run a search diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py index adbb5d22090..3a917fbed1a 100644 --- a/backend/scripts/dev_run_background_jobs.py +++ b/backend/scripts/dev_run_background_jobs.py @@ -21,18 +21,17 @@ def run_jobs(exclude_indexing: bool) -> None: cmd_worker = [ "celery", "-A", - "ee.danswer.background.celery", + "ee.danswer.background.celery.celery_app", "worker", "--pool=threads", - "--autoscale=3,10", + "--concurrency=6", "--loglevel=INFO", - "--concurrency=1", ] cmd_beat = [ "celery", "-A", - "ee.danswer.background.celery", + "ee.danswer.background.celery.celery_app", "beat", "--loglevel=INFO", ] @@ -74,7 +73,7 @@ def run_jobs(exclude_indexing: bool) -> None: try: update_env = os.environ.copy() update_env["PYTHONPATH"] = "." - cmd_perm_sync = ["python", "ee.danswer/background/permission_sync.py"] + cmd_perm_sync = ["python", "ee/danswer/background/permission_sync.py"] indexing_process = subprocess.Popen( cmd_perm_sync, diff --git a/backend/scripts/force_delete_connector_by_id.py b/backend/scripts/force_delete_connector_by_id.py old mode 100644 new mode 100755 index a740bb06398..118a4dfa4b4 --- a/backend/scripts/force_delete_connector_by_id.py +++ b/backend/scripts/force_delete_connector_by_id.py @@ -5,6 +5,8 @@ from sqlalchemy import delete from sqlalchemy.orm import Session +from danswer.db.enums import ConnectorCredentialPairStatus + # Modify sys.path current_dir = os.path.dirname(os.path.abspath(__file__)) parent_dir = os.path.dirname(current_dir) @@ -14,12 +16,15 @@ # flake8: noqa: E402 # Now import Danswer modules -from danswer.db.models import DocumentSet__ConnectorCredentialPair +from danswer.db.models import ( + DocumentSet__ConnectorCredentialPair, + UserGroup__ConnectorCredentialPair, +) from danswer.db.connector import fetch_connector_by_id from danswer.db.document import get_documents_for_connector_credential_pair from danswer.db.index_attempt import ( delete_index_attempts, - cancel_indexing_attempts_for_connector, + cancel_indexing_attempts_for_ccpair, ) from danswer.db.models import ConnectorCredentialPair from danswer.document_index.interfaces import DocumentIndex @@ -44,7 +49,7 @@ _DELETION_BATCH_SIZE = 1000 -def unsafe_deletion( +def _unsafe_deletion( db_session: Session, document_index: DocumentIndex, cc_pair: ConnectorCredentialPair, @@ -82,11 +87,22 @@ def unsafe_deletion( credential_id=credential_id, ) - # Delete document sets + connector / credential Pairs + # Delete document sets stmt = delete(DocumentSet__ConnectorCredentialPair).where( DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == pair_id ) db_session.execute(stmt) + + # delete user group associations + stmt = delete(UserGroup__ConnectorCredentialPair).where( + UserGroup__ConnectorCredentialPair.cc_pair_id == pair_id + ) + db_session.execute(stmt) + + # need to flush to avoid foreign key violations + db_session.flush() + + # delete the actual connector credential pair stmt = delete(ConnectorCredentialPair).where( ConnectorCredentialPair.connector_id == connector_id, ConnectorCredentialPair.credential_id == credential_id, @@ -103,7 +119,7 @@ def unsafe_deletion( db_session.delete(connector) db_session.commit() - logger.info( + logger.notice( "Successfully deleted connector_credential_pair with connector_id:" f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs." ) @@ -117,20 +133,20 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: Are you SURE you want to continue? (enter 'Y' to continue): " ) if user_input != "Y": - logger.info(f"You entered {user_input}. Exiting!") + logger.notice(f"You entered {user_input}. Exiting!") return - logger.info("Getting connector credential pair") + logger.notice("Getting connector credential pair") cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) if not cc_pair: logger.error(f"Connector credential pair with ID {cc_pair_id} not found") return - if not cc_pair.connector.disabled: + if cc_pair.status == ConnectorCredentialPairStatus.ACTIVE: logger.error( - f"Connector {cc_pair.connector.name} is not disabled, cannot continue. \ - Please navigate to the connector and disbale before attempting again" + f"Connector {cc_pair.connector.name} is active, cannot continue. \ + Please navigate to the connector and pause before attempting again" ) return @@ -144,9 +160,9 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: ) return - logger.info("Cancelling indexing attempt for the connector") - cancel_indexing_attempts_for_connector( - connector_id=connector_id, db_session=db_session, include_secondary_index=True + logger.notice("Cancelling indexing attempt for the connector") + cancel_indexing_attempts_for_ccpair( + cc_pair_id=cc_pair_id, db_session=db_session, include_secondary_index=True ) validated_cc_pair = get_connector_credential_pair( @@ -161,30 +177,34 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: f"{connector_id} and Credential ID: {credential_id} does not exist." ) + file_names: list[str] = ( + cc_pair.connector.connector_specific_config["file_locations"] + if cc_pair.connector.source == DocumentSource.FILE + else [] + ) try: - logger.info("Deleting information from Vespa and Postgres") + logger.notice("Deleting information from Vespa and Postgres") curr_ind_name, sec_ind_name = get_both_index_names(db_session) document_index = get_default_document_index( primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name ) - files_deleted_count = unsafe_deletion( + files_deleted_count = _unsafe_deletion( db_session=db_session, document_index=document_index, cc_pair=cc_pair, pair_id=cc_pair_id, ) - logger.info(f"Deleted {files_deleted_count} files!") + logger.notice(f"Deleted {files_deleted_count} files!") except Exception as e: logger.error(f"Failed to delete connector due to {e}") - if cc_pair.connector.source == DocumentSource.FILE: - connector = cc_pair.connector - logger.info("Deleting stored files!") + if file_names: + logger.notice("Deleting stored files!") file_store = get_default_file_store(db_session) - for file_name in connector.connector_specific_config["file_locations"]: - logger.info(f"Deleting file {file_name}") + for file_name in file_names: + logger.notice(f"Deleting file {file_name}") file_store.delete_file(file_name) diff --git a/backend/scripts/migrate_vespa_to_acl.py b/backend/scripts/migrate_vespa_to_acl.py deleted file mode 100644 index a0dce3361d7..00000000000 --- a/backend/scripts/migrate_vespa_to_acl.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Script which updates Vespa to align with the access described in Postgres. -Should be run when a user who has docs already indexed switches over to the new -access control system. This allows them to not have to re-index all documents. -NOTE: this is auto-run on server startup, so should not be necessary in most cases.""" -from danswer.utils.acl import set_acl_for_vespa - - -if __name__ == "__main__": - set_acl_for_vespa() diff --git a/backend/scripts/save_load_state.py b/backend/scripts/save_load_state.py index 19d6fa66e25..94431e8c813 100644 --- a/backend/scripts/save_load_state.py +++ b/backend/scripts/save_load_state.py @@ -21,7 +21,7 @@ def save_postgres(filename: str, container_name: str) -> None: - logger.info("Attempting to take Postgres snapshot") + logger.notice("Attempting to take Postgres snapshot") cmd = f"docker exec {container_name} pg_dump -U {POSTGRES_USER} -h {POSTGRES_HOST} -p {POSTGRES_PORT} -W -F t {POSTGRES_DB}" with open(filename, "w") as file: subprocess.run( @@ -35,7 +35,7 @@ def save_postgres(filename: str, container_name: str) -> None: def load_postgres(filename: str, container_name: str) -> None: - logger.info("Attempting to load Postgres snapshot") + logger.notice("Attempting to load Postgres snapshot") try: alembic_cfg = Config("alembic.ini") command.upgrade(alembic_cfg, "head") @@ -57,7 +57,7 @@ def load_postgres(filename: str, container_name: str) -> None: def save_vespa(filename: str) -> None: - logger.info("Attempting to take Vespa snapshot") + logger.notice("Attempting to take Vespa snapshot") continuation = "" params = {} doc_jsons: list[dict] = [] diff --git a/backend/scripts/test-openapi-key.py b/backend/scripts/test-openapi-key.py index ba2e61dceaa..8b12279ba1c 100644 --- a/backend/scripts/test-openapi-key.py +++ b/backend/scripts/test-openapi-key.py @@ -2,6 +2,8 @@ VALID_MODEL_LIST = [ + "gpt-4o-mini", + "gpt-4o", "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4", diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index 5eaff4aa1a2..5ad36cc93c4 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -1,5 +1,7 @@ import os +# Used for logging +SLACK_CHANNEL_ID = "channel_id" MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost" MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0" @@ -14,22 +16,27 @@ ) # Danswer custom Deep Learning Models -INTENT_MODEL_VERSION = "danswer/intent-model" -INTENT_MODEL_CONTEXT_SIZE = 256 +INTENT_MODEL_VERSION = "danswer/hybrid-intent-token-classifier" +INTENT_MODEL_TAG = "v1.0.3" # Bi-Encoder, other details DOC_EMBEDDING_CONTEXT_SIZE = 512 -# Cross Encoder Settings -ENABLE_RERANKING_ASYNC_FLOW = ( - os.environ.get("ENABLE_RERANKING_ASYNC_FLOW", "").lower() == "true" +# Used to distinguish alternative indices +ALT_INDEX_SUFFIX = "__danswer_alt_index" + +# Used for loading defaults for automatic deployments and dev flows +# For local, use: mixedbread-ai/mxbai-rerank-xsmall-v1 +DEFAULT_CROSS_ENCODER_MODEL_NAME = ( + os.environ.get("DEFAULT_CROSS_ENCODER_MODEL_NAME") or None +) +DEFAULT_CROSS_ENCODER_API_KEY = os.environ.get("DEFAULT_CROSS_ENCODER_API_KEY") or None +DEFAULT_CROSS_ENCODER_PROVIDER_TYPE = ( + os.environ.get("DEFAULT_CROSS_ENCODER_PROVIDER_TYPE") or None ) -ENABLE_RERANKING_REAL_TIME_FLOW = ( - os.environ.get("ENABLE_RERANKING_REAL_TIME_FLOW", "").lower() == "true" +DISABLE_RERANK_FOR_STREAMING = ( + os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true" ) -# Only using one cross-encoder for now -CROSS_ENCODER_MODEL_ENSEMBLE = ["mixedbread-ai/mxbai-rerank-large-v1"] -CROSS_EMBED_CONTEXT_SIZE = 512 # This controls the minimum number of pytorch "threads" to allocate to the embedding # model. If torch finds more threads on its own, this value is not used. @@ -39,5 +46,25 @@ # or intent classification INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true" -# notset, debug, info, warning, error, or critical -LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") +# The process needs to have this for the log file to write to +# otherwise, it will not create additional log files +LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer" + +# Enable generating persistent log files for local dev environments +DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true" +# notset, debug, info, notice, warning, error, or critical +LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice") + + +# Fields which should only be set on new search setting +PRESERVED_SEARCH_FIELDS = [ + "provider_type", + "api_key", + "model_name", + "index_name", + "multipass_indexing", + "model_dim", + "normalize", + "passage_prefix", + "query_prefix", +] diff --git a/backend/shared_configs/enums.py b/backend/shared_configs/enums.py new file mode 100644 index 00000000000..918872d44b3 --- /dev/null +++ b/backend/shared_configs/enums.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class EmbeddingProvider(str, Enum): + OPENAI = "openai" + COHERE = "cohere" + VOYAGE = "voyage" + GOOGLE = "google" + + +class RerankerProvider(str, Enum): + COHERE = "cohere" + + +class EmbedTextType(str, Enum): + QUERY = "query" + PASSAGE = "passage" diff --git a/backend/shared_configs/model_server_models.py b/backend/shared_configs/model_server_models.py index 020a24a30b3..3014616c620 100644 --- a/backend/shared_configs/model_server_models.py +++ b/backend/shared_configs/model_server_models.py @@ -1,30 +1,55 @@ from pydantic import BaseModel +from shared_configs.enums import EmbeddingProvider +from shared_configs.enums import EmbedTextType +from shared_configs.enums import RerankerProvider + +Embedding = list[float] + class EmbedRequest(BaseModel): - # This already includes any prefixes, the text is just passed directly to the model texts: list[str] - model_name: str + # Can be none for cloud embedding model requests, error handling logic exists for other cases + model_name: str | None = None max_context_length: int normalize_embeddings: bool + api_key: str | None = None + provider_type: EmbeddingProvider | None = None + text_type: EmbedTextType + manual_query_prefix: str | None = None + manual_passage_prefix: str | None = None + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} class EmbedResponse(BaseModel): - embeddings: list[list[float]] + embeddings: list[Embedding] class RerankRequest(BaseModel): query: str documents: list[str] + model_name: str + provider_type: RerankerProvider | None = None + api_key: str | None = None + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} class RerankResponse(BaseModel): - scores: list[list[float]] + scores: list[float] class IntentRequest(BaseModel): query: str + # Sequence classification threshold + semantic_percent_threshold: float + # Token classification threshold + keyword_percent_threshold: float class IntentResponse(BaseModel): - class_probs: list[float] + is_keyword: bool + keywords: list[str] diff --git a/backend/shared_configs/utils.py b/backend/shared_configs/utils.py new file mode 100644 index 00000000000..c40795eb4aa --- /dev/null +++ b/backend/shared_configs/utils.py @@ -0,0 +1,11 @@ +from typing import TypeVar + + +T = TypeVar("T") + + +def batch_list( + lst: list[T], + batch_size: int, +) -> list[list[T]]: + return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)] diff --git a/backend/supervisord.conf b/backend/supervisord.conf index 7f6376939a5..b56c763b94f 100644 --- a/backend/supervisord.conf +++ b/backend/supervisord.conf @@ -1,15 +1,14 @@ [supervisord] nodaemon=true +user=root logfile=/var/log/supervisord.log # Indexing is the heaviest job, also requires some CPU intensive steps # Cannot place this in Celery for now because Celery must run as a single process (see note below) # Indexing uses multi-processing to speed things up [program:document_indexing] -environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true +environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing command=python danswer/background/update.py -stdout_logfile=/var/log/update.log -stdout_logfile_maxbytes=52428800 redirect_stderr=true autorestart=true @@ -25,17 +24,15 @@ autorestart=true # relatively compute-light (e.g. they tend to just make a bunch of requests to # Vespa / Postgres) [program:celery_worker] -command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --autoscale=3,10 --loglevel=INFO --logfile=/var/log/celery_worker.log -stdout_logfile=/var/log/celery_worker_supervisor.log -stdout_logfile_maxbytes=52428800 +command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker_supervisor.log +environment=LOG_FILE_NAME=celery_worker redirect_stderr=true autorestart=true # Job scheduler for periodic tasks [program:celery_beat] -command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log -stdout_logfile=/var/log/celery_beat_supervisor.log -stdout_logfile_maxbytes=52428800 +command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat_supervisor.log +environment=LOG_FILE_NAME=celery_beat redirect_stderr=true autorestart=true @@ -43,20 +40,24 @@ autorestart=true # for all channels that the DanswerBot has been added to. # If not setup, this will just fail 5 times and then stop. # More details on setup here: https://docs.danswer.dev/slack_bot_setup -[program:slack_bot_listener] +[program:slack_bot] command=python danswer/danswerbot/slack/listener.py -stdout_logfile=/var/log/slack_bot_listener.log -stdout_logfile_maxbytes=52428800 +environment=LOG_FILE_NAME=slack_bot redirect_stderr=true autorestart=true startretries=5 startsecs=60 # Pushes all logs from the above programs to stdout -# No log rotation here, since it's stdout it's handled by the Docker container loglevel -# To be standard across all the services +# No log rotation here, since it's stdout it's handled by the Docker container logging [program:log-redirect-handler] -command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log +command=tail -qF + /var/log/document_indexing_info.log + /var/log/celery_beat_supervisor.log + /var/log/celery_worker_supervisor.log + /var/log/celery_beat_debug.log + /var/log/celery_worker_debug.log + /var/log/slack_bot_debug.log stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 redirect_stderr=true diff --git a/backend/tests/api/test_api.py b/backend/tests/api/test_api.py new file mode 100644 index 00000000000..059c40824d5 --- /dev/null +++ b/backend/tests/api/test_api.py @@ -0,0 +1,104 @@ +import os +from collections.abc import Generator +from typing import Any + +import pytest +from fastapi.testclient import TestClient + +from danswer.main import fetch_versioned_implementation +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +@pytest.fixture(scope="function") +def client() -> Generator[TestClient, Any, None]: + # Set environment variables + os.environ["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "True" + + # Initialize TestClient with the FastAPI app + app = fetch_versioned_implementation( + module="danswer.main", attribute="get_application" + )() + client = TestClient(app) + yield client + + +@pytest.mark.skip( + reason="enable when we have a testing environment with preloaded data" +) +def test_handle_simplified_chat_message(client: TestClient) -> None: + req: dict[str, Any] = {} + + req["persona_id"] = 0 + req["description"] = "pytest" + response = client.post("/chat/create-chat-session", json=req) + chat_session_id = response.json()["chat_session_id"] + + req = {} + req["chat_session_id"] = chat_session_id + req["message"] = "hello" + + response = client.post("/chat/send-message-simple-api", json=req) + assert response.status_code == 200 + + +@pytest.mark.skip( + reason="enable when we have a testing environment with preloaded data" +) +def test_handle_send_message_simple_with_history(client: TestClient) -> None: + req: dict[str, Any] = {} + messages = [] + messages.append({"message": "What sorts of questions can you answer for me?"}) + # messages.append({"message": + # "I'd be happy to assist you with a wide range of questions related to Ramp's expense management platform. " + # "I can help with topics such as:\n\n" + # "1. Setting up and managing your Ramp account\n" + # "2. Using Ramp cards and making purchases\n" + # "3. Submitting and reviewing expenses\n" + # "4. Understanding Ramp's features and benefits\n" + # "5. Navigating the Ramp dashboard and mobile app\n" + # "6. Managing team spending and budgets\n" + # "7. Integrating Ramp with accounting software\n" + # "8. Troubleshooting common issues\n\n" + # "Feel free to ask any specific questions you have about using Ramp, " + # "and I'll do my best to provide clear and helpful answers. " + # "Is there a particular area you'd like to know more about?", + # "role": "assistant"}) + # req["prompt_id"] = 9 + # req["persona_id"] = 6 + + # Yoda + req["persona_id"] = 1 + req["prompt_id"] = 4 + messages.append( + { + "message": "Answer questions for you, I can. " + "About many topics, knowledge I have. " + "But specific to documents provided, limited my responses are. " + "Ask you may about:\n\n" + "- User interviews and building trust with participants\n" + "- Designing effective surveys and survey questions \n" + "- Product analysis approaches\n" + "- Recruiting participants for research\n" + "- Discussion guides for user interviews\n" + "- Types of survey questions\n\n" + "More there may be, but focus on these areas, the given context does. " + "Specific questions you have, ask you should. Guide you I will, as best I can.", + "role": "assistant", + } + ) + # messages.append({"message": "Where can I pilot a survey?"}) + + # messages.append({"message": "How many data points should I collect to validate my solution?"}) + messages.append({"message": "What is solution validation research used for?"}) + + req["messages"] = messages + + response = client.post("/chat/send-message-simple-with-history", json=req) + assert response.status_code == 200 + + resp_json = response.json() + + # persona must have LLM relevance enabled for this to pass + assert len(resp_json["llm_chunks_indices"]) > 0 diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py new file mode 100644 index 00000000000..7f05242c50b --- /dev/null +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -0,0 +1,42 @@ +import os +import time + +import pytest + +from danswer.connectors.confluence.connector import ConfluenceConnector + + +@pytest.fixture +def confluence_connector() -> ConfluenceConnector: + connector = ConfluenceConnector(os.environ["CONFLUENCE_TEST_SPACE_URL"]) + connector.load_credentials( + { + "confluence_username": os.environ["CONFLUENCE_USER_NAME"], + "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"], + } + ) + return connector + + +def test_confluence_connector_basic(confluence_connector: ConfluenceConnector) -> None: + doc_batch_generator = confluence_connector.poll_source(0, time.time()) + + doc_batch = next(doc_batch_generator) + with pytest.raises(StopIteration): + next(doc_batch_generator) + + assert len(doc_batch) == 1 + + doc = doc_batch[0] + assert doc.semantic_identifier == "DailyConnectorTestSpace Home" + assert doc.metadata["labels"] == ["testlabel"] + assert doc.primary_owners + assert doc.primary_owners[0].email == "chris@danswer.ai" + assert len(doc.sections) == 1 + + section = doc.sections[0] + assert section.text == "test123small" + assert ( + section.link + == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview" + ) diff --git a/backend/tests/daily/embedding/test_embeddings.py b/backend/tests/daily/embedding/test_embeddings.py new file mode 100644 index 00000000000..a9c12b236cf --- /dev/null +++ b/backend/tests/daily/embedding/test_embeddings.py @@ -0,0 +1,78 @@ +import os + +import pytest + +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from shared_configs.enums import EmbedTextType +from shared_configs.model_server_models import EmbeddingProvider + +VALID_SAMPLE = ["hi", "hello my name is bob", "woah there!!!. 😃"] +# openai limit is 2048, cohere is supposed to be 96 but in practice that doesn't +# seem to be true +TOO_LONG_SAMPLE = ["a"] * 2500 + + +def _run_embeddings( + texts: list[str], embedding_model: EmbeddingModel, expected_dim: int +) -> None: + for text_type in [EmbedTextType.QUERY, EmbedTextType.PASSAGE]: + embeddings = embedding_model.encode(texts, text_type) + assert len(embeddings) == len(texts) + assert len(embeddings[0]) == expected_dim + + +@pytest.fixture +def openai_embedding_model() -> EmbeddingModel: + return EmbeddingModel( + server_host="localhost", + server_port=9000, + model_name="text-embedding-3-small", + normalize=True, + query_prefix=None, + passage_prefix=None, + api_key=os.getenv("OPENAI_API_KEY"), + provider_type=EmbeddingProvider.OPENAI, + ) + + +def test_openai_embedding(openai_embedding_model: EmbeddingModel) -> None: + _run_embeddings(VALID_SAMPLE, openai_embedding_model, 1536) + _run_embeddings(TOO_LONG_SAMPLE, openai_embedding_model, 1536) + + +@pytest.fixture +def cohere_embedding_model() -> EmbeddingModel: + return EmbeddingModel( + server_host="localhost", + server_port=9000, + model_name="embed-english-light-v3.0", + normalize=True, + query_prefix=None, + passage_prefix=None, + api_key=os.getenv("COHERE_API_KEY"), + provider_type=EmbeddingProvider.COHERE, + ) + + +def test_cohere_embedding(cohere_embedding_model: EmbeddingModel) -> None: + _run_embeddings(VALID_SAMPLE, cohere_embedding_model, 384) + _run_embeddings(TOO_LONG_SAMPLE, cohere_embedding_model, 384) + + +@pytest.fixture +def local_nomic_embedding_model() -> EmbeddingModel: + return EmbeddingModel( + server_host="localhost", + server_port=9000, + model_name="nomic-ai/nomic-embed-text-v1", + normalize=True, + query_prefix="search_query: ", + passage_prefix="search_document: ", + api_key=None, + provider_type=None, + ) + + +def test_local_nomic_embedding(local_nomic_embedding_model: EmbeddingModel) -> None: + _run_embeddings(VALID_SAMPLE, local_nomic_embedding_model, 768) + _run_embeddings(TOO_LONG_SAMPLE, local_nomic_embedding_model, 768) diff --git a/backend/tests/integration/Dockerfile b/backend/tests/integration/Dockerfile new file mode 100644 index 00000000000..d4869dd76c2 --- /dev/null +++ b/backend/tests/integration/Dockerfile @@ -0,0 +1,83 @@ +FROM python:3.11.7-slim-bookworm +# Dockerfile for integration tests +# Currently needs all dependencies, since the ITs use some of the Danswer +# backend code. + +# Install system dependencies +# cmake needed for psycopg (postgres) +# libpq-dev needed for psycopg (postgres) +# curl included just for users' convenience +# zip for Vespa step futher down +# ca-certificates for HTTPS +RUN apt-get update && \ + apt-get install -y \ + cmake \ + curl \ + zip \ + ca-certificates \ + libgnutls30=3.7.9-2+deb12u3 \ + libblkid1=2.38.1-5+deb12u1 \ + libmount1=2.38.1-5+deb12u1 \ + libsmartcols1=2.38.1-5+deb12u1 \ + libuuid1=2.38.1-5+deb12u1 \ + libxmlsec1-dev \ + pkg-config \ + gcc && \ + rm -rf /var/lib/apt/lists/* && \ + apt-get clean + +# Install Python dependencies +# Remove py which is pulled in by retry, py is not needed and is a CVE +COPY ./requirements/default.txt /tmp/requirements.txt +COPY ./requirements/ee.txt /tmp/ee-requirements.txt +RUN pip install --no-cache-dir --upgrade \ + -r /tmp/requirements.txt \ + -r /tmp/ee-requirements.txt && \ + pip uninstall -y py && \ + playwright install chromium && \ + playwright install-deps chromium && \ + ln -s /usr/local/bin/supervisord /usr/bin/supervisord + +# Cleanup for CVEs and size reduction +# https://github.com/tornadoweb/tornado/issues/3107 +# xserver-common and xvfb included by playwright installation but not needed after +# perl-base is part of the base Python Debian image but not needed for Danswer functionality +# perl-base could only be removed with --allow-remove-essential +RUN apt-get update && \ + apt-get remove -y --allow-remove-essential \ + perl-base \ + xserver-common \ + xvfb \ + cmake \ + libldap-2.5-0 \ + libxmlsec1-dev \ + pkg-config \ + gcc && \ + apt-get install -y libxmlsec1-openssl && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* && \ + rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key + +# Set up application files +WORKDIR /app + +# Enterprise Version Files +COPY ./ee /app/ee +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# Set up application files +COPY ./danswer /app/danswer +COPY ./shared_configs /app/shared_configs +COPY ./alembic /app/alembic +COPY ./alembic.ini /app/alembic.ini +COPY supervisord.conf /usr/etc/supervisord.conf + +# Integration test stuff +COPY ./requirements/dev.txt /tmp/dev-requirements.txt +RUN pip install --no-cache-dir --upgrade \ + -r /tmp/dev-requirements.txt +COPY ./tests/integration /app/tests/integration + +ENV PYTHONPATH /app + +CMD ["pytest", "-s", "/app/tests/integration"] diff --git a/backend/tests/integration/common_utils/chat.py b/backend/tests/integration/common_utils/chat.py new file mode 100644 index 00000000000..cd33d4edcaf --- /dev/null +++ b/backend/tests/integration/common_utils/chat.py @@ -0,0 +1,66 @@ +import requests +from sqlalchemy.orm import Session + +from danswer.db.models import User + + +def test_create_chat_session_and_send_messages(db_session: Session) -> None: + # Create a test user + test_user = User(email="test@example.com", hashed_password="dummy_hash") + db_session.add(test_user) + db_session.commit() + + base_url = "http://localhost:8080" # Adjust this to your API's base URL + headers = {"Authorization": f"Bearer {test_user.id}"} + + # Create a new chat session + create_session_response = requests.post( + f"{base_url}/chat/create-chat-session", + json={ + "description": "Test Chat", + "persona_id": 1, + }, # Assuming persona_id 1 exists + headers=headers, + ) + assert create_session_response.status_code == 200 + chat_session_id = create_session_response.json()["chat_session_id"] + + # Send first message + first_message = "Hello, this is a test message." + send_message_response = requests.post( + f"{base_url}/chat/send-message", + json={ + "chat_session_id": chat_session_id, + "message": first_message, + "prompt_id": None, + "retrieval_options": {"top_k": 3}, + "stream_response": False, + }, + headers=headers, + ) + assert send_message_response.status_code == 200 + + # Send second message + second_message = "Can you provide more information?" + send_message_response = requests.post( + f"{base_url}/chat/send-message", + json={ + "chat_session_id": chat_session_id, + "message": second_message, + "prompt_id": None, + "retrieval_options": {"top_k": 3}, + "stream_response": False, + }, + headers=headers, + ) + assert send_message_response.status_code == 200 + + # Verify chat session details + get_session_response = requests.get( + f"{base_url}/chat/get-chat-session/{chat_session_id}", headers=headers + ) + assert get_session_response.status_code == 200 + session_details = get_session_response.json() + assert session_details["chat_session_id"] == chat_session_id + assert session_details["description"] == "Test Chat" + assert len(session_details["messages"]) == 4 # 2 user messages + 2 AI responses diff --git a/backend/tests/integration/common_utils/connectors.py b/backend/tests/integration/common_utils/connectors.py new file mode 100644 index 00000000000..e7734cec3c8 --- /dev/null +++ b/backend/tests/integration/common_utils/connectors.py @@ -0,0 +1,114 @@ +import uuid +from typing import cast + +import requests +from pydantic import BaseModel + +from danswer.configs.constants import DocumentSource +from danswer.db.enums import ConnectorCredentialPairStatus +from tests.integration.common_utils.constants import API_SERVER_URL + + +class ConnectorCreationDetails(BaseModel): + connector_id: int + credential_id: int + cc_pair_id: int + + +class ConnectorClient: + @staticmethod + def create_connector( + name_prefix: str = "test_connector", credential_id: int | None = None + ) -> ConnectorCreationDetails: + unique_id = uuid.uuid4() + + connector_name = f"{name_prefix}_{unique_id}" + connector_data = { + "name": connector_name, + "source": DocumentSource.NOT_APPLICABLE, + "input_type": "load_state", + "connector_specific_config": {}, + "refresh_freq": 60, + "disabled": True, + } + response = requests.post( + f"{API_SERVER_URL}/manage/admin/connector", + json=connector_data, + ) + response.raise_for_status() + connector_id = response.json()["id"] + + # associate the credential with the connector + if not credential_id: + print("ID not specified, creating new credential") + # Create a new credential + credential_data = { + "credential_json": {}, + "admin_public": True, + "source": DocumentSource.NOT_APPLICABLE, + } + response = requests.post( + f"{API_SERVER_URL}/manage/credential", + json=credential_data, + ) + response.raise_for_status() + credential_id = cast(int, response.json()["id"]) + + cc_pair_metadata = {"name": f"test_cc_pair_{unique_id}", "is_public": True} + response = requests.put( + f"{API_SERVER_URL}/manage/connector/{connector_id}/credential/{credential_id}", + json=cc_pair_metadata, + ) + response.raise_for_status() + + # fetch the conenector credential pair id using the indexing status API + response = requests.get( + f"{API_SERVER_URL}/manage/admin/connector/indexing-status" + ) + response.raise_for_status() + indexing_statuses = response.json() + + cc_pair_id = None + for status in indexing_statuses: + if ( + status["connector"]["id"] == connector_id + and status["credential"]["id"] == credential_id + ): + cc_pair_id = status["cc_pair_id"] + break + + if cc_pair_id is None: + raise ValueError("Could not find the connector credential pair id") + + print( + f"Created connector with connector_id: {connector_id}, credential_id: {credential_id}, cc_pair_id: {cc_pair_id}" + ) + return ConnectorCreationDetails( + connector_id=int(connector_id), + credential_id=int(credential_id), + cc_pair_id=int(cc_pair_id), + ) + + @staticmethod + def update_connector_status( + cc_pair_id: int, status: ConnectorCredentialPairStatus + ) -> None: + response = requests.put( + f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/status", + json={"status": status}, + ) + response.raise_for_status() + + @staticmethod + def delete_connector(connector_id: int, credential_id: int) -> None: + response = requests.post( + f"{API_SERVER_URL}/manage/admin/deletion-attempt", + json={"connector_id": connector_id, "credential_id": credential_id}, + ) + response.raise_for_status() + + @staticmethod + def get_connectors() -> list[dict]: + response = requests.get(f"{API_SERVER_URL}/manage/connector") + response.raise_for_status() + return response.json() diff --git a/backend/tests/integration/common_utils/constants.py b/backend/tests/integration/common_utils/constants.py new file mode 100644 index 00000000000..efc98dde7de --- /dev/null +++ b/backend/tests/integration/common_utils/constants.py @@ -0,0 +1,7 @@ +import os + +API_SERVER_PROTOCOL = os.getenv("API_SERVER_PROTOCOL") or "http" +API_SERVER_HOST = os.getenv("API_SERVER_HOST") or "localhost" +API_SERVER_PORT = os.getenv("API_SERVER_PORT") or "8080" +API_SERVER_URL = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}" +MAX_DELAY = 30 diff --git a/backend/tests/integration/common_utils/document_sets.py b/backend/tests/integration/common_utils/document_sets.py new file mode 100644 index 00000000000..dc898611108 --- /dev/null +++ b/backend/tests/integration/common_utils/document_sets.py @@ -0,0 +1,30 @@ +from typing import cast + +import requests + +from danswer.server.features.document_set.models import DocumentSet +from danswer.server.features.document_set.models import DocumentSetCreationRequest +from tests.integration.common_utils.constants import API_SERVER_URL + + +class DocumentSetClient: + @staticmethod + def create_document_set( + doc_set_creation_request: DocumentSetCreationRequest, + ) -> int: + response = requests.post( + f"{API_SERVER_URL}/manage/admin/document-set", + json=doc_set_creation_request.model_dump(), + ) + response.raise_for_status() + return cast(int, response.json()) + + @staticmethod + def fetch_document_sets() -> list[DocumentSet]: + response = requests.get(f"{API_SERVER_URL}/manage/document-set") + response.raise_for_status() + + document_sets = [ + DocumentSet.parse_obj(doc_set_data) for doc_set_data in response.json() + ] + return document_sets diff --git a/backend/tests/integration/common_utils/llm.py b/backend/tests/integration/common_utils/llm.py new file mode 100644 index 00000000000..ba8b89d6b4d --- /dev/null +++ b/backend/tests/integration/common_utils/llm.py @@ -0,0 +1,62 @@ +import os +from typing import cast + +import requests +from pydantic import BaseModel +from pydantic import PrivateAttr + +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from tests.integration.common_utils.constants import API_SERVER_URL + + +class LLMProvider(BaseModel): + provider: str + api_key: str + default_model_name: str + api_base: str | None = None + api_version: str | None = None + is_default: bool = True + + # only populated after creation + _provider_id: int | None = PrivateAttr() + + def create(self) -> int: + llm_provider = LLMProviderUpsertRequest( + name=self.provider, + provider=self.provider, + default_model_name=self.default_model_name, + api_key=self.api_key, + api_base=self.api_base, + api_version=self.api_version, + custom_config=None, + fast_default_model_name=None, + is_public=True, + groups=[], + display_model_names=None, + model_names=None, + ) + + response = requests.put( + f"{API_SERVER_URL}/admin/llm/provider", + json=llm_provider.dict(), + ) + response.raise_for_status() + + self._provider_id = cast(int, response.json()["id"]) + return self._provider_id + + def delete(self) -> None: + response = requests.delete( + f"{API_SERVER_URL}/admin/llm/provider/{self._provider_id}" + ) + response.raise_for_status() + + +def seed_default_openai_provider() -> LLMProvider: + llm = LLMProvider( + provider="openai", + default_model_name="gpt-4o-mini", + api_key=os.environ["OPENAI_API_KEY"], + ) + llm.create() + return llm diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py new file mode 100644 index 00000000000..3815aa9f972 --- /dev/null +++ b/backend/tests/integration/common_utils/reset.py @@ -0,0 +1,172 @@ +import logging +import time + +import psycopg2 +import requests + +from alembic import command +from alembic.config import Config +from danswer.configs.app_configs import POSTGRES_HOST +from danswer.configs.app_configs import POSTGRES_PASSWORD +from danswer.configs.app_configs import POSTGRES_PORT +from danswer.configs.app_configs import POSTGRES_USER +from danswer.db.engine import build_connection_string +from danswer.db.engine import get_session_context_manager +from danswer.db.engine import SYNC_DB_API +from danswer.db.search_settings import get_current_search_settings +from danswer.db.swap_index import check_index_swap +from danswer.document_index.vespa.index import DOCUMENT_ID_ENDPOINT +from danswer.document_index.vespa.index import VespaIndex +from danswer.indexing.models import IndexingSetting +from danswer.main import setup_postgres +from danswer.main import setup_vespa +from tests.integration.common_utils.llm import seed_default_openai_provider + + +def _run_migrations( + database_url: str, direction: str = "upgrade", revision: str = "head" +) -> None: + # hide info logs emitted during migration + logging.getLogger("alembic").setLevel(logging.CRITICAL) + + # Create an Alembic configuration object + alembic_cfg = Config("alembic.ini") + alembic_cfg.set_section_option("logger_alembic", "level", "WARN") + + # Set the SQLAlchemy URL in the Alembic configuration + alembic_cfg.set_main_option("sqlalchemy.url", database_url) + + # Run the migration + if direction == "upgrade": + command.upgrade(alembic_cfg, revision) + elif direction == "downgrade": + command.downgrade(alembic_cfg, revision) + else: + raise ValueError( + f"Invalid direction: {direction}. Must be 'upgrade' or 'downgrade'." + ) + + logging.getLogger("alembic").setLevel(logging.INFO) + + +def reset_postgres(database: str = "postgres") -> None: + """Reset the Postgres database.""" + + # NOTE: need to delete all rows to allow migrations to be rolled back + # as there are a few downgrades that don't properly handle data in tables + conn = psycopg2.connect( + dbname=database, + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + host=POSTGRES_HOST, + port=POSTGRES_PORT, + ) + cur = conn.cursor() + + # Disable triggers to prevent foreign key constraints from being checked + cur.execute("SET session_replication_role = 'replica';") + + # Fetch all table names in the current database + cur.execute( + """ + SELECT tablename + FROM pg_tables + WHERE schemaname = 'public' + """ + ) + + tables = cur.fetchall() + + for table in tables: + table_name = table[0] + + # Don't touch migration history + if table_name == "alembic_version": + continue + + # Don't touch Kombu + if table_name == "kombu_message" or table_name == "kombu_queue": + continue + + cur.execute(f'DELETE FROM "{table_name}"') + + # Re-enable triggers + cur.execute("SET session_replication_role = 'origin';") + + conn.commit() + cur.close() + conn.close() + + # downgrade to base + upgrade back to head + conn_str = build_connection_string( + db=database, + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + host=POSTGRES_HOST, + port=POSTGRES_PORT, + db_api=SYNC_DB_API, + ) + _run_migrations( + conn_str, + direction="downgrade", + revision="base", + ) + _run_migrations( + conn_str, + direction="upgrade", + revision="head", + ) + + # do the same thing as we do on API server startup + with get_session_context_manager() as db_session: + setup_postgres(db_session) + + +def reset_vespa() -> None: + """Wipe all data from the Vespa index.""" + with get_session_context_manager() as db_session: + # swap to the correct default model + check_index_swap(db_session) + + search_settings = get_current_search_settings(db_session) + index_name = search_settings.index_name + + setup_vespa( + document_index=VespaIndex(index_name=index_name, secondary_index_name=None), + index_setting=IndexingSetting.from_db_model(search_settings), + secondary_index_setting=None, + ) + + for _ in range(5): + try: + continuation = None + should_continue = True + while should_continue: + params = {"selection": "true", "cluster": "danswer_index"} + if continuation: + params = {**params, "continuation": continuation} + response = requests.delete( + DOCUMENT_ID_ENDPOINT.format(index_name=index_name), params=params + ) + response.raise_for_status() + + response_json = response.json() + + continuation = response_json.get("continuation") + should_continue = bool(continuation) + + break + except Exception as e: + print(f"Error deleting documents: {e}") + time.sleep(5) + + +def reset_all() -> None: + """Reset both Postgres and Vespa.""" + print("Resetting Postgres...") + reset_postgres() + print("Resetting Vespa...") + reset_vespa() + print("Seeding LLM Providers...") + seed_default_openai_provider() + print("Finished resetting all.") diff --git a/backend/tests/integration/common_utils/seed_documents.py b/backend/tests/integration/common_utils/seed_documents.py new file mode 100644 index 00000000000..b6720c9aebe --- /dev/null +++ b/backend/tests/integration/common_utils/seed_documents.py @@ -0,0 +1,72 @@ +import uuid + +import requests +from pydantic import BaseModel + +from danswer.configs.constants import DocumentSource +from tests.integration.common_utils.connectors import ConnectorClient +from tests.integration.common_utils.constants import API_SERVER_URL + + +class SimpleTestDocument(BaseModel): + id: str + content: str + + +class SeedDocumentResponse(BaseModel): + cc_pair_id: int + documents: list[SimpleTestDocument] + + +class TestDocumentClient: + @staticmethod + def seed_documents( + num_docs: int = 5, cc_pair_id: int | None = None + ) -> SeedDocumentResponse: + if not cc_pair_id: + connector_details = ConnectorClient.create_connector() + cc_pair_id = connector_details.cc_pair_id + + # Create and ingest some documents + documents: list[dict] = [] + for _ in range(num_docs): + document_id = f"test-doc-{uuid.uuid4()}" + document = { + "document": { + "id": document_id, + "sections": [ + { + "text": f"This is test document {document_id}", + "link": f"{document_id}", + } + ], + "source": DocumentSource.NOT_APPLICABLE, + # just for testing metadata + "metadata": {"document_id": document_id}, + "semantic_identifier": f"Test Document {document_id}", + "from_ingestion_api": True, + }, + "cc_pair_id": cc_pair_id, + } + documents.append(document) + response = requests.post( + f"{API_SERVER_URL}/danswer-api/ingestion", + json=document, + ) + response.raise_for_status() + + print("Seeding completed successfully.") + return SeedDocumentResponse( + cc_pair_id=cc_pair_id, + documents=[ + SimpleTestDocument( + id=document["document"]["id"], + content=document["document"]["sections"][0]["text"], + ) + for document in documents + ], + ) + + +if __name__ == "__main__": + seed_documents_resp = TestDocumentClient.seed_documents() diff --git a/backend/tests/integration/common_utils/user_groups.py b/backend/tests/integration/common_utils/user_groups.py new file mode 100644 index 00000000000..0cd44066463 --- /dev/null +++ b/backend/tests/integration/common_utils/user_groups.py @@ -0,0 +1,24 @@ +from typing import cast + +import requests + +from ee.danswer.server.user_group.models import UserGroup +from ee.danswer.server.user_group.models import UserGroupCreate +from tests.integration.common_utils.constants import API_SERVER_URL + + +class UserGroupClient: + @staticmethod + def create_user_group(user_group_creation_request: UserGroupCreate) -> int: + response = requests.post( + f"{API_SERVER_URL}/manage/admin/user-group", + json=user_group_creation_request.model_dump(), + ) + response.raise_for_status() + return cast(int, response.json()["id"]) + + @staticmethod + def fetch_user_groups() -> list[UserGroup]: + response = requests.get(f"{API_SERVER_URL}/manage/admin/user-group") + response.raise_for_status() + return [UserGroup(**ug) for ug in response.json()] diff --git a/backend/tests/integration/common_utils/vespa.py b/backend/tests/integration/common_utils/vespa.py new file mode 100644 index 00000000000..aff7ef5eca6 --- /dev/null +++ b/backend/tests/integration/common_utils/vespa.py @@ -0,0 +1,27 @@ +import requests + +from danswer.document_index.vespa.index import DOCUMENT_ID_ENDPOINT + + +class TestVespaClient: + def __init__(self, index_name: str): + self.index_name = index_name + self.vespa_document_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) + + def get_documents_by_id( + self, document_ids: list[str], wanted_doc_count: int = 1_000 + ) -> dict: + selection = " or ".join( + f"{self.index_name}.document_id=='{document_id}'" + for document_id in document_ids + ) + params = { + "selection": selection, + "wantedDocumentCount": wanted_doc_count, + } + response = requests.get( + self.vespa_document_url, + params=params, # type: ignore + ) + response.raise_for_status() + return response.json() diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py new file mode 100644 index 00000000000..6c46e9f875e --- /dev/null +++ b/backend/tests/integration/conftest.py @@ -0,0 +1,26 @@ +from collections.abc import Generator + +import pytest +from sqlalchemy.orm import Session + +from danswer.db.engine import get_session_context_manager +from danswer.db.search_settings import get_current_search_settings +from tests.integration.common_utils.reset import reset_all +from tests.integration.common_utils.vespa import TestVespaClient + + +@pytest.fixture +def db_session() -> Generator[Session, None, None]: + with get_session_context_manager() as session: + yield session + + +@pytest.fixture +def vespa_client(db_session: Session) -> TestVespaClient: + search_settings = get_current_search_settings(db_session) + return TestVespaClient(index_name=search_settings.index_name) + + +@pytest.fixture +def reset() -> None: + reset_all() diff --git a/backend/tests/integration/tests/connector/test_deletion.py b/backend/tests/integration/tests/connector/test_deletion.py new file mode 100644 index 00000000000..78ad2378af9 --- /dev/null +++ b/backend/tests/integration/tests/connector/test_deletion.py @@ -0,0 +1,190 @@ +import time + +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.server.features.document_set.models import DocumentSetCreationRequest +from tests.integration.common_utils.connectors import ConnectorClient +from tests.integration.common_utils.constants import MAX_DELAY +from tests.integration.common_utils.document_sets import DocumentSetClient +from tests.integration.common_utils.seed_documents import TestDocumentClient +from tests.integration.common_utils.user_groups import UserGroupClient +from tests.integration.common_utils.user_groups import UserGroupCreate +from tests.integration.common_utils.vespa import TestVespaClient + + +def test_connector_deletion(reset: None, vespa_client: TestVespaClient) -> None: + # create connectors + c1_details = ConnectorClient.create_connector(name_prefix="tc1") + c2_details = ConnectorClient.create_connector(name_prefix="tc2") + c1_seed_res = TestDocumentClient.seed_documents( + num_docs=5, cc_pair_id=c1_details.cc_pair_id + ) + c2_seed_res = TestDocumentClient.seed_documents( + num_docs=5, cc_pair_id=c2_details.cc_pair_id + ) + + # create document sets + doc_set_1_id = DocumentSetClient.create_document_set( + DocumentSetCreationRequest( + name="Test Document Set 1", + description="Intially connector to be deleted, should be empty after test", + cc_pair_ids=[c1_details.cc_pair_id], + is_public=True, + users=[], + groups=[], + ) + ) + + doc_set_2_id = DocumentSetClient.create_document_set( + DocumentSetCreationRequest( + name="Test Document Set 2", + description="Intially both connectors, should contain undeleted connector after test", + cc_pair_ids=[c1_details.cc_pair_id, c2_details.cc_pair_id], + is_public=True, + users=[], + groups=[], + ) + ) + + # wait for document sets to be synced + start = time.time() + while True: + doc_sets = DocumentSetClient.fetch_document_sets() + doc_set_1 = next( + (doc_set for doc_set in doc_sets if doc_set.id == doc_set_1_id), None + ) + doc_set_2 = next( + (doc_set for doc_set in doc_sets if doc_set.id == doc_set_2_id), None + ) + + if not doc_set_1 or not doc_set_2: + raise RuntimeError("Document set not found") + + if doc_set_1.is_up_to_date and doc_set_2.is_up_to_date: + break + + if time.time() - start > MAX_DELAY: + raise TimeoutError("Document sets were not synced within the max delay") + + time.sleep(2) + + print("Document sets created and synced") + + # if so, create ACLs + user_group_1 = UserGroupClient.create_user_group( + UserGroupCreate( + name="Test User Group 1", user_ids=[], cc_pair_ids=[c1_details.cc_pair_id] + ) + ) + user_group_2 = UserGroupClient.create_user_group( + UserGroupCreate( + name="Test User Group 2", + user_ids=[], + cc_pair_ids=[c1_details.cc_pair_id, c2_details.cc_pair_id], + ) + ) + + # wait for user groups to be available + start = time.time() + while True: + user_groups = {ug.id: ug for ug in UserGroupClient.fetch_user_groups()} + + if not ( + user_group_1 in user_groups.keys() and user_group_2 in user_groups.keys() + ): + raise RuntimeError("User groups not found") + + if ( + user_groups[user_group_1].is_up_to_date + and user_groups[user_group_2].is_up_to_date + ): + break + + if time.time() - start > MAX_DELAY: + raise TimeoutError("User groups were not synced within the max delay") + + time.sleep(2) + + print("User groups created and synced") + + # delete connector 1 + ConnectorClient.update_connector_status( + cc_pair_id=c1_details.cc_pair_id, status=ConnectorCredentialPairStatus.PAUSED + ) + ConnectorClient.delete_connector( + connector_id=c1_details.connector_id, credential_id=c1_details.credential_id + ) + + start = time.time() + while True: + connectors = ConnectorClient.get_connectors() + + if c1_details.connector_id not in [c["id"] for c in connectors]: + break + + if time.time() - start > MAX_DELAY: + raise TimeoutError("Connector 1 was not deleted within the max delay") + + time.sleep(2) + + print("Connector 1 deleted") + + # validate vespa documents + c1_vespa_docs = vespa_client.get_documents_by_id( + [doc.id for doc in c1_seed_res.documents] + )["documents"] + c2_vespa_docs = vespa_client.get_documents_by_id( + [doc.id for doc in c2_seed_res.documents] + )["documents"] + + assert len(c1_vespa_docs) == 0 + assert len(c2_vespa_docs) == 5 + + for doc in c2_vespa_docs: + assert doc["fields"]["access_control_list"] == { + "PUBLIC": 1, + "group:Test User Group 2": 1, + } + assert doc["fields"]["document_sets"] == {"Test Document Set 2": 1} + + # check that only connector 1 is deleted + # TODO: check for the CC pair rather than the connector once the refactor is done + all_connectors = ConnectorClient.get_connectors() + assert len(all_connectors) == 1 + assert all_connectors[0]["id"] == c2_details.connector_id + + # validate document sets + all_doc_sets = DocumentSetClient.fetch_document_sets() + assert len(all_doc_sets) == 2 + + doc_set_1_found = False + doc_set_2_found = False + for doc_set in all_doc_sets: + if doc_set.id == doc_set_1_id: + doc_set_1_found = True + assert doc_set.cc_pair_descriptors == [] + + if doc_set.id == doc_set_2_id: + doc_set_2_found = True + assert len(doc_set.cc_pair_descriptors) == 1 + assert doc_set.cc_pair_descriptors[0].id == c2_details.cc_pair_id + + assert doc_set_1_found + assert doc_set_2_found + + # validate user groups + all_user_groups = UserGroupClient.fetch_user_groups() + assert len(all_user_groups) == 2 + + user_group_1_found = False + user_group_2_found = False + for user_group in all_user_groups: + if user_group.id == user_group_1: + user_group_1_found = True + assert user_group.cc_pairs == [] + if user_group.id == user_group_2: + user_group_2_found = True + assert len(user_group.cc_pairs) == 1 + assert user_group.cc_pairs[0].id == c2_details.cc_pair_id + + assert user_group_1_found + assert user_group_2_found diff --git a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py new file mode 100644 index 00000000000..b00c2e3d1e6 --- /dev/null +++ b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py @@ -0,0 +1,36 @@ +import requests + +from tests.integration.common_utils.connectors import ConnectorClient +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.seed_documents import TestDocumentClient + + +def test_send_message_simple_with_history(reset: None) -> None: + # create connectors + c1_details = ConnectorClient.create_connector(name_prefix="tc1") + c1_seed_res = TestDocumentClient.seed_documents( + num_docs=5, cc_pair_id=c1_details.cc_pair_id + ) + + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [{"message": c1_seed_res.documents[0].content, "role": "user"}], + "persona_id": 0, + "prompt_id": 0, + }, + ) + assert response.status_code == 200 + + response_json = response.json() + + # Check that the top document is the correct document + assert response_json["simple_search_docs"][0]["id"] == c1_seed_res.documents[0].id + + # assert that the metadata is correct + for doc in c1_seed_res.documents: + found_doc = next( + (x for x in response_json["simple_search_docs"] if x["id"] == doc.id), None + ) + assert found_doc + assert found_doc["metadata"]["document_id"] == doc.id diff --git a/backend/tests/integration/tests/document_set/test_syncing.py b/backend/tests/integration/tests/document_set/test_syncing.py new file mode 100644 index 00000000000..9a6b42ab5df --- /dev/null +++ b/backend/tests/integration/tests/document_set/test_syncing.py @@ -0,0 +1,78 @@ +import time + +from danswer.server.features.document_set.models import DocumentSetCreationRequest +from tests.integration.common_utils.document_sets import DocumentSetClient +from tests.integration.common_utils.seed_documents import TestDocumentClient +from tests.integration.common_utils.vespa import TestVespaClient + + +def test_multiple_document_sets_syncing_same_connnector( + reset: None, vespa_client: TestVespaClient +) -> None: + # Seed documents + seed_result = TestDocumentClient.seed_documents(num_docs=5) + cc_pair_id = seed_result.cc_pair_id + + # Create first document set + doc_set_1_id = DocumentSetClient.create_document_set( + DocumentSetCreationRequest( + name="Test Document Set 1", + description="First test document set", + cc_pair_ids=[cc_pair_id], + is_public=True, + users=[], + groups=[], + ) + ) + + doc_set_2_id = DocumentSetClient.create_document_set( + DocumentSetCreationRequest( + name="Test Document Set 2", + description="Second test document set", + cc_pair_ids=[cc_pair_id], + is_public=True, + users=[], + groups=[], + ) + ) + + # wait for syncing to be complete + max_delay = 45 + start = time.time() + while True: + doc_sets = DocumentSetClient.fetch_document_sets() + doc_set_1 = next( + (doc_set for doc_set in doc_sets if doc_set.id == doc_set_1_id), None + ) + doc_set_2 = next( + (doc_set for doc_set in doc_sets if doc_set.id == doc_set_2_id), None + ) + + if not doc_set_1 or not doc_set_2: + raise RuntimeError("Document set not found") + + if doc_set_1.is_up_to_date and doc_set_2.is_up_to_date: + assert [ccp.id for ccp in doc_set_1.cc_pair_descriptors] == [ + ccp.id for ccp in doc_set_2.cc_pair_descriptors + ] + break + + if time.time() - start > max_delay: + raise TimeoutError("Document sets were not synced within the max delay") + + time.sleep(2) + + # get names so we can compare to what is in vespa + doc_sets = DocumentSetClient.fetch_document_sets() + doc_set_names = {doc_set.name for doc_set in doc_sets} + + # make sure documents are as expected + seeded_document_ids = [doc.id for doc in seed_result.documents] + + result = vespa_client.get_documents_by_id([doc.id for doc in seed_result.documents]) + documents = result["documents"] + assert len(documents) == len(seed_result.documents) + assert all(doc["fields"]["document_id"] in seeded_document_ids for doc in documents) + assert all( + set(doc["fields"]["document_sets"].keys()) == doc_set_names for doc in documents + ) diff --git a/backend/tests/regression/answer_quality/README.md b/backend/tests/regression/answer_quality/README.md index 4610a9abc2e..27a0bd5ae96 100644 --- a/backend/tests/regression/answer_quality/README.md +++ b/backend/tests/regression/answer_quality/README.md @@ -9,66 +9,98 @@ This Python script automates the process of running search quality tests for a b - Manages environment variables - Switches to specified Git branch - Uploads test documents -- Runs search quality tests using Relari +- Runs search quality tests - Cleans up Docker containers (optional) ## Usage 1. Ensure you have the required dependencies installed. 2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file. -3. Configure the `.env_eval` file with the correct environment variables. -4. Navigate to the answer_quality folder: +3. Configure the `.env_eval` file in `deployment/docker_compose` with the correct environment variables. +4. Set up the PYTHONPATH permanently: + Add the following line to your shell configuration file (e.g., `~/.bashrc`, `~/.zshrc`, or `~/.bash_profile`): + ``` + export PYTHONPATH=$PYTHONPATH:/path/to/danswer/backend + ``` + Replace `/path/to/danswer` with the actual path to your Danswer repository. + After adding this line, restart your terminal or run `source ~/.bashrc` (or the appropriate config file) to apply the changes. +5. Navigate to Danswer repo: ``` -cd danswer/backend/tests/regression/answer_quality +cd path/to/danswer ``` -4. Run the script: +6. Navigate to the answer_quality folder: ``` -python search_quality_test.py +cd backend/tests/regression/answer_quality ``` +7. To launch the evaluation environment, run the launch_eval_env.py script (this step can be skipped if you are running the env outside of docker, just leave "environment_name" blank): +``` +python launch_eval_env.py +``` +8. Run the file_uploader.py script to upload the zip files located at the path "zipped_documents_file" +``` +python file_uploader.py +``` +9. Run the run_qa.py script to ask questions from the jsonl located at the path "questions_file". This will hit the "query/answer-with-quote" API endpoint. +``` +python run_qa.py +``` + +Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data. + +If you decide to run multiple UIs at the same time, the ports will increment upwards from 3000 (E.g. http://localhost:3001). + +To see which port the desired instance is on, look at the ports on the nginx container by running `docker ps` or using docker desktop. + +Docker daemon must be running for this to work. ## Configuration Edit `search_test_config.yaml` to set: - output_folder - This is the folder where the folders for each test will go - These folders will contain the postgres/vespa data as well as the results for each test + - This is the folder where the folders for each test will go + - These folders will contain the postgres/vespa data as well as the results for each test - zipped_documents_file - The path to the zip file containing the files you'd like to test against + - The path to the zip file containing the files you'd like to test against - questions_file - The path to the yaml containing the questions you'd like to test with -- branch - Set the branch to null if you want it to just use the code as is + - The path to the yaml containing the questions you'd like to test with +- commit_sha + - Set this to the SHA of the commit you want to run the test against + - You must clear all local changes if you want to use this option + - Set this to null if you want it to just use the code as is - clean_up_docker_containers - Set this to true to automatically delete all docker containers, networks and volumes after the test + - Set this to true to automatically delete all docker containers, networks and volumes after the test - launch_web_ui - Set this to true if you want to use the UI during/after the testing process + - Set this to true if you want to use the UI during/after the testing process +- only_state + - Whether to only run Vespa and Postgres +- only_retrieve_docs + - Set true to only retrieve documents, not LLM response + - This is to save on API costs - use_cloud_gpu - Set to true or false depending on if you want to use the remote gpu - Only need to set this if use_cloud_gpu is true + - Set to true or false depending on if you want to use the remote gpu + - Only need to set this if use_cloud_gpu is true - model_server_ip - This is the ip of the remote model server - Only need to set this if use_cloud_gpu is true + - This is the ip of the remote model server + - Only need to set this if use_cloud_gpu is true - model_server_port - This is the port of the remote model server - Only need to set this if use_cloud_gpu is true -- existing_test_suffix - Use this if you would like to relaunch a previous test instance - Input the suffix of the test you'd like to re-launch - (E.g. to use the data from folder "test_1234_5678" put "_1234_5678") - No new files will automatically be uploaded - Leave empty to run a new test + - This is the port of the remote model server + - Only need to set this if use_cloud_gpu is true +- environment_name + - Use this if you would like to relaunch a previous test instance + - Input the env_name of the test you'd like to re-launch + - Leave empty to launch referencing local default network locations - limit - Max number of questions you'd like to ask against the dataset - Set to null for no limit + - Max number of questions you'd like to ask against the dataset + - Set to null for no limit - llm - Fill this out according to the normal LLM seeding - + - Fill this out according to the normal LLM seeding -To restart the evaluation using a particular index, set the suffix and turn off clean_up_docker_containers. -This also will skip running the evaluation questions, in this case, the relari.py script can be run manually. +## Relaunching From Existing Data -Docker daemon must be running for this to work. +To launch an existing set of containers that has already completed indexing, set the environment_name variable. This will launch the docker containers mounted on the volumes of the indicated env_name and will not automatically index any documents or run any QA. -Each script is able to be individually run to upload additional docs or run additional tests \ No newline at end of file +Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above). +- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path. +- run_qa.py will ask questions located at the questions_file path against the indexed documents. diff --git a/backend/tests/regression/answer_quality/__init__.py b/backend/tests/regression/answer_quality/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/regression/answer_quality/api_utils.py b/backend/tests/regression/answer_quality/api_utils.py index 20292f35b63..5a46032c62f 100644 --- a/backend/tests/regression/answer_quality/api_utils.py +++ b/backend/tests/regression/answer_quality/api_utils.py @@ -11,17 +11,22 @@ from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.server.documents.models import ConnectorBase -from tests.regression.answer_quality.cli_utils import ( - get_api_server_host_port, -) +from tests.regression.answer_quality.cli_utils import get_api_server_host_port +GENERAL_HEADERS = {"Content-Type": "application/json"} -def _api_url_builder(run_suffix: str, api_path: str) -> str: - return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path + +def _api_url_builder(env_name: str, api_path: str) -> str: + if env_name: + return f"http://localhost:{get_api_server_host_port(env_name)}" + api_path + else: + return "http://localhost:8080" + api_path -@retry(tries=5, delay=2, backoff=2) -def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: +@retry(tries=5, delay=5) +def get_answer_from_query( + query: str, only_retrieve_docs: bool, env_name: str +) -> tuple[list[str], str]: filters = IndexFilters( source_type=None, document_set=None, @@ -44,41 +49,38 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: ), chain_of_thought=False, return_contexts=True, + skip_gen_ai_answer_generation=only_retrieve_docs, ) - url = _api_url_builder(run_suffix, "/query/answer-with-quote/") + url = _api_url_builder(env_name, "/query/answer-with-quote/") headers = { "Content-Type": "application/json", } - body = new_message_request.dict() + body = new_message_request.model_dump() body["user"] = None try: response_json = requests.post(url, headers=headers, json=body).json() - content_list = [ - context.get("content", "") - for context in response_json.get("contexts", {}).get("contexts", []) - ] - answer = response_json.get("answer") + context_data_list = response_json.get("contexts", {}).get("contexts", []) + answer = response_json.get("answer", "") or "" except Exception as e: - print("Failed to answer the questions, trying again") - print(f"error: {str(e)}") + print("Failed to answer the questions:") + print(f"\t {str(e)}") raise e - print("\nquery: ", query) - print("answer: ", answer) - print("content_list: ", content_list) - - return content_list, answer - + return context_data_list, answer -def check_if_query_ready(run_suffix: str) -> bool: - url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/") - headers = { - "Content-Type": "application/json", - } - indexing_status_dict = requests.get(url, headers=headers).json() +@retry(tries=10, delay=10) +def check_indexing_status(env_name: str) -> tuple[int, bool]: + url = _api_url_builder(env_name, "/manage/admin/connector/indexing-status/") + try: + indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json() + except Exception as e: + print("Failed to check indexing status, API server is likely starting up:") + print(f"\t {str(e)}") + print("trying again") + raise e ongoing_index_attempts = False doc_count = 0 @@ -86,31 +88,28 @@ def check_if_query_ready(run_suffix: str) -> bool: status = index_attempt["last_status"] if status == IndexingStatus.IN_PROGRESS or status == IndexingStatus.NOT_STARTED: ongoing_index_attempts = True + elif status == IndexingStatus.SUCCESS: + doc_count += 16 doc_count += index_attempt["docs_indexed"] + doc_count -= 16 - if not doc_count: - print("No docs indexed, waiting for indexing to start") - elif ongoing_index_attempts: - print( - f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..." - ) - - return doc_count > 0 and not ongoing_index_attempts - + # all the +16 and -16 are to account for the fact that the indexing status + # is only updated every 16 documents and will tells us how many are + # chunked, not indexed. probably need to fix this. in the future! + if doc_count: + doc_count += 16 + return doc_count, ongoing_index_attempts -def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None: - url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/") - headers = { - "Content-Type": "application/json", - } +def run_cc_once(env_name: str, connector_id: int, credential_id: int) -> None: + url = _api_url_builder(env_name, "/manage/admin/connector/run-once/") body = { "connector_id": connector_id, "credential_ids": [credential_id], "from_beginning": True, } print("body:", body) - response = requests.post(url, headers=headers, json=body) + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("Connector created successfully:", response.json()) else: @@ -118,17 +117,14 @@ def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None: print("Failed text:", response.text) -def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> None: +def create_cc_pair(env_name: str, connector_id: int, credential_id: int) -> None: url = _api_url_builder( - run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}" + env_name, f"/manage/connector/{connector_id}/credential/{credential_id}" ) - headers = { - "Content-Type": "application/json", - } - body = {"name": "zip_folder_contents", "is_public": True} + body = {"name": "zip_folder_contents", "is_public": True, "groups": []} print("body:", body) - response = requests.put(url, headers=headers, json=body) + response = requests.put(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("Connector created successfully:", response.json()) else: @@ -136,16 +132,14 @@ def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> No print("Failed text:", response.text) -def _get_existing_connector_names(run_suffix: str) -> list[str]: - url = _api_url_builder(run_suffix, "/manage/connector") - headers = { - "Content-Type": "application/json", - } +def _get_existing_connector_names(env_name: str) -> list[str]: + url = _api_url_builder(env_name, "/manage/connector") + body = { "credential_json": {}, "admin_public": True, } - response = requests.get(url, headers=headers, json=body) + response = requests.get(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: connectors = response.json() return [connector["name"] for connector in connectors] @@ -153,13 +147,10 @@ def _get_existing_connector_names(run_suffix: str) -> list[str]: raise RuntimeError(response.__dict__) -def create_connector(run_suffix: str, file_paths: list[str]) -> int: - url = _api_url_builder(run_suffix, "/manage/admin/connector") - headers = { - "Content-Type": "application/json", - } +def create_connector(env_name: str, file_paths: list[str]) -> int: + url = _api_url_builder(env_name, "/manage/admin/connector") connector_name = base_connector_name = "search_eval_connector" - existing_connector_names = _get_existing_connector_names(run_suffix) + existing_connector_names = _get_existing_connector_names(env_name) count = 1 while connector_name in existing_connector_names: @@ -173,29 +164,25 @@ def create_connector(run_suffix: str, file_paths: list[str]) -> int: connector_specific_config={"file_locations": file_paths}, refresh_freq=None, prune_freq=None, - disabled=False, + indexing_start=None, ) - body = connector.dict() - print("body:", body) - response = requests.post(url, headers=headers, json=body) + body = connector.model_dump() + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: - print("Connector created successfully:", response.json()) return response.json()["id"] else: raise RuntimeError(response.__dict__) -def create_credential(run_suffix: str) -> int: - url = _api_url_builder(run_suffix, "/manage/credential") - headers = { - "Content-Type": "application/json", - } +def create_credential(env_name: str) -> int: + url = _api_url_builder(env_name, "/manage/credential") body = { "credential_json": {}, "admin_public": True, + "source": DocumentSource.FILE, } - response = requests.post(url, headers=headers, json=body) + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("credential created successfully:", response.json()) return response.json()["id"] @@ -204,12 +191,12 @@ def create_credential(run_suffix: str) -> int: @retry(tries=10, delay=2, backoff=2) -def upload_file(run_suffix: str, zip_file_path: str) -> list[str]: +def upload_file(env_name: str, zip_file_path: str) -> list[str]: files = [ ("files", open(zip_file_path, "rb")), ] - api_path = _api_url_builder(run_suffix, "/manage/admin/connector/file/upload") + api_path = _api_url_builder(env_name, "/manage/admin/connector/file/upload") try: response = requests.post(api_path, files=files) response.raise_for_status() # Raises an HTTPError for bad responses diff --git a/backend/tests/regression/answer_quality/cli_utils.py b/backend/tests/regression/answer_quality/cli_utils.py index a39309efd58..874a6292dbc 100644 --- a/backend/tests/regression/answer_quality/cli_utils.py +++ b/backend/tests/regression/answer_quality/cli_utils.py @@ -1,10 +1,14 @@ import json import os +import socket import subprocess import sys +import time +from datetime import datetime from threading import Thread from typing import IO +import yaml from retry import retry @@ -56,17 +60,44 @@ def get_current_commit_sha() -> str: return sha -def switch_to_branch(branch: str) -> None: - print(f"Switching to branch: {branch}...") - _run_command(f"git checkout {branch}") - _run_command("git pull") - print(f"Successfully switched to branch: {branch}") +def switch_to_commit(commit_sha: str) -> None: + print(f"Switching to commit: {commit_sha}...") + _run_command(f"git checkout {commit_sha}") + print(f"Successfully switched to commit: {commit_sha}") print("Repository updated successfully.") -def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> str: +def get_docker_container_env_vars(env_name: str) -> dict: + """ + Retrieves environment variables from "background" and "api_server" Docker containers. + """ + print(f"Getting environment variables for containers with env_name: {env_name}") + + combined_env_vars = {} + for container_type in ["background", "api_server"]: + container_name = _run_command( + f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{env_name}/'" + )[0].strip() + if not container_name: + raise RuntimeError( + f"No {container_type} container found with env_name: {env_name}" + ) + + env_vars_json = _run_command( + f"docker inspect --format='{{{{json .Config.Env}}}}' {container_name}" + )[0] + env_vars_list = json.loads(env_vars_json.strip()) + + for env_var in env_vars_list: + key, value = env_var.split("=", 1) + combined_env_vars[key] = value + + return combined_env_vars + + +def manage_data_directories(env_name: str, base_path: str, use_cloud_gpu: bool) -> None: # Use the user's home directory as the base path - target_path = os.path.join(os.path.expanduser(base_path), f"test{suffix}") + target_path = os.path.join(os.path.expanduser(base_path), env_name) directories = { "DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"), "DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"), @@ -84,9 +115,8 @@ def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> os.makedirs(directory, exist_ok=True) os.environ[env_var] = directory print(f"Set {env_var} to: {directory}") - relari_output_path = os.path.join(target_path, "relari_output/") - os.makedirs(relari_output_path, exist_ok=True) - return relari_output_path + results_output_path = os.path.join(target_path, "evaluations_output/") + os.makedirs(results_output_path, exist_ok=True) def set_env_variables( @@ -108,20 +138,36 @@ def set_env_variables( print(f"Set {env_var_name} to: {env_var}") +def _is_port_in_use(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("localhost", port)) == 0 + + def start_docker_compose( - run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool + env_name: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False ) -> None: print("Starting Docker Compose...") - os.chdir("../deployment/docker_compose") - command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack{run_suffix} up -d" + os.chdir(os.path.dirname(__file__)) + os.chdir("../../../../deployment/docker_compose/") + command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack-{env_name} up -d" command += " --build" command += " --force-recreate" - if not launch_web_ui: - command += " --scale web_server=0" - command += " --scale nginx=0" - if use_cloud_gpu: - command += " --scale indexing_model_server=0" - command += " --scale inference_model_server=0" + + if only_state: + command += " index relational_db" + else: + if use_cloud_gpu: + command += " --scale indexing_model_server=0" + command += " --scale inference_model_server=0" + if launch_web_ui: + web_ui_port = 3000 + while _is_port_in_use(web_ui_port): + web_ui_port += 1 + print(f"UI will be launched at http://localhost:{web_ui_port}") + os.environ["NGINX_PORT"] = str(web_ui_port) + else: + command += " --scale web_server=0" + command += " --scale nginx=0" print("Docker Command:\n", command) @@ -129,16 +175,17 @@ def start_docker_compose( print("Containers have been launched") -def cleanup_docker(run_suffix: str) -> None: +def cleanup_docker(env_name: str) -> None: print( - f"Deleting Docker containers, volumes, and networks for project suffix: {run_suffix}" + f"Deleting Docker containers, volumes, and networks for project env_name: {env_name}" ) stdout, _ = _run_command("docker ps -a --format '{{json .}}'") containers = [json.loads(line) for line in stdout.splitlines()] - - project_name = f"danswer-stack{run_suffix}" + if not env_name: + env_name = datetime.now().strftime("-%Y") + project_name = f"danswer-stack{env_name}" containers_to_delete = [ c for c in containers if c["Names"].startswith(project_name) ] @@ -174,23 +221,23 @@ def cleanup_docker(run_suffix: str) -> None: networks = stdout.splitlines() - networks_to_delete = [n for n in networks if run_suffix in n] + networks_to_delete = [n for n in networks if env_name in n] if not networks_to_delete: - print(f"No networks found containing suffix: {run_suffix}") + print(f"No networks found containing env_name: {env_name}") else: network_names = " ".join(networks_to_delete) _run_command(f"docker network rm {network_names}") print( - f"Successfully deleted {len(networks_to_delete)} networks containing suffix: {run_suffix}" + f"Successfully deleted {len(networks_to_delete)} networks containing env_name: {env_name}" ) @retry(tries=5, delay=5, backoff=2) -def get_api_server_host_port(suffix: str) -> str: +def get_api_server_host_port(env_name: str) -> str: """ - This pulls all containers with the provided suffix + This pulls all containers with the provided env_name It then grabs the JSON specific container with a name containing "api_server" It then grabs the port info from the JSON and strips out the relevent data """ @@ -201,16 +248,16 @@ def get_api_server_host_port(suffix: str) -> str: server_jsons = [] for container in containers: - if container_name in container["Names"] and suffix in container["Names"]: + if container_name in container["Names"] and env_name in container["Names"]: server_jsons.append(container) if not server_jsons: raise RuntimeError( - f"No container found containing: {container_name} and {suffix}" + f"No container found containing: {container_name} and {env_name}" ) elif len(server_jsons) > 1: raise RuntimeError( - f"Too many containers matching {container_name} found, please indicate a suffix" + f"Too many containers matching {container_name} found, please indicate a env_name" ) server_json = server_jsons[0] @@ -231,6 +278,44 @@ def get_api_server_host_port(suffix: str) -> str: raise RuntimeError(f"Too many ports matching {client_port} found") if not matching_ports: raise RuntimeError( - f"No port found containing: {client_port} for container: {container_name} and suffix: {suffix}" + f"No port found containing: {client_port} for container: {container_name} and env_name: {env_name}" ) return matching_ports[0] + + +# Added function to restart Vespa container +def restart_vespa_container(env_name: str) -> None: + print(f"Restarting Vespa container for env_name: {env_name}") + + # Find the Vespa container + stdout, _ = _run_command( + f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{env_name}/'" + ) + container_name = stdout.strip() + + if not container_name: + raise RuntimeError(f"No Vespa container found with env_name: {env_name}") + + # Restart the container + _run_command(f"docker restart {container_name}") + + print(f"Vespa container '{container_name}' has begun restarting") + + time.sleep(30) + print(f"Vespa container '{container_name}' has been restarted") + + +if __name__ == "__main__": + """ + Running this just cleans up the docker environment for the container indicated by environment_name + If no environment_name is indicated, will just clean up all danswer docker containers/volumes/networks + Note: vespa/postgres mounts are not deleted + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(current_dir, "search_test_config.yaml") + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + if not isinstance(config, dict): + raise TypeError("config must be a dictionary") + cleanup_docker(config["environment_name"]) diff --git a/backend/tests/regression/answer_quality/eval_direct_qa.py b/backend/tests/regression/answer_quality/eval_direct_qa.py deleted file mode 100644 index 1a8ca7c4526..00000000000 --- a/backend/tests/regression/answer_quality/eval_direct_qa.py +++ /dev/null @@ -1,252 +0,0 @@ -import argparse -import builtins -from contextlib import contextmanager -from datetime import datetime -from typing import Any -from typing import TextIO - -import yaml -from sqlalchemy.orm import Session - -from danswer.chat.models import LLMMetricsContainer -from danswer.configs.constants import MessageType -from danswer.db.engine import get_sqlalchemy_engine -from danswer.one_shot_answer.answer_question import get_search_answer -from danswer.one_shot_answer.models import DirectQARequest -from danswer.one_shot_answer.models import ThreadMessage -from danswer.search.models import IndexFilters -from danswer.search.models import OptionalSearchSetting -from danswer.search.models import RerankMetricsContainer -from danswer.search.models import RetrievalDetails -from danswer.search.models import RetrievalMetricsContainer -from danswer.utils.callbacks import MetricsHander - - -engine = get_sqlalchemy_engine() - - -@contextmanager -def redirect_print_to_file(file: TextIO) -> Any: - original_print = builtins.print - builtins.print = lambda *args, **kwargs: original_print(*args, file=file, **kwargs) - try: - yield - finally: - builtins.print = original_print - - -def load_yaml(filepath: str) -> dict: - with open(filepath, "r") as file: - data = yaml.safe_load(file) - return data - - -def word_wrap(s: str, max_line_size: int = 100, prepend_tab: bool = True) -> str: - words = s.split() - - current_line: list[str] = [] - result_lines: list[str] = [] - current_length = 0 - for word in words: - if len(word) > max_line_size: - if current_line: - result_lines.append(" ".join(current_line)) - current_line = [] - current_length = 0 - - result_lines.append(word) - continue - - if current_length + len(word) + len(current_line) > max_line_size: - result_lines.append(" ".join(current_line)) - current_line = [] - current_length = 0 - - current_line.append(word) - current_length += len(word) - - if current_line: - result_lines.append(" ".join(current_line)) - - return "\t" + "\n\t".join(result_lines) if prepend_tab else "\n".join(result_lines) - - -def get_answer_for_question( - query: str, db_session: Session -) -> tuple[ - str | None, - RetrievalMetricsContainer | None, - RerankMetricsContainer | None, -]: - filters = IndexFilters( - source_type=None, - document_set=None, - time_cutoff=None, - tags=None, - access_control_list=None, - ) - - messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)] - - new_message_request = DirectQARequest( - messages=messages, - prompt_id=0, - persona_id=0, - retrieval_options=RetrievalDetails( - run_search=OptionalSearchSetting.ALWAYS, - real_time=True, - filters=filters, - enable_auto_detect_filters=False, - ), - chain_of_thought=False, - ) - - retrieval_metrics = MetricsHander[RetrievalMetricsContainer]() - rerank_metrics = MetricsHander[RerankMetricsContainer]() - - answer = get_search_answer( - query_req=new_message_request, - user=None, - max_document_tokens=None, - max_history_tokens=None, - db_session=db_session, - answer_generation_timeout=100, - enable_reflexion=False, - bypass_acl=True, - retrieval_metrics_callback=retrieval_metrics.record_metric, - rerank_metrics_callback=rerank_metrics.record_metric, - ) - - return ( - answer.answer, - retrieval_metrics.metrics, - rerank_metrics.metrics, - ) - - -def _print_retrieval_metrics( - metrics_container: RetrievalMetricsContainer, show_all: bool -) -> None: - for ind, metric in enumerate(metrics_container.metrics): - if not show_all and ind >= 10: - break - - if ind != 0: - print() # for spacing purposes - print(f"\tDocument: {metric.document_id}") - print(f"\tLink: {metric.first_link or 'NA'}") - section_start = metric.chunk_content_start.replace("\n", " ") - print(f"\tSection Start: {section_start}") - print(f"\tSimilarity Distance Metric: {metric.score}") - - -def _print_reranking_metrics( - metrics_container: RerankMetricsContainer, show_all: bool -) -> None: - # Printing the raw scores as they're more informational than post-norm/boosting - for ind, metric in enumerate(metrics_container.metrics): - if not show_all and ind >= 10: - break - - if ind != 0: - print() # for spacing purposes - print(f"\tDocument: {metric.document_id}") - print(f"\tLink: {metric.first_link or 'NA'}") - section_start = metric.chunk_content_start.replace("\n", " ") - print(f"\tSection Start: {section_start}") - print(f"\tSimilarity Score: {metrics_container.raw_similarity_scores[ind]}") - - -def _print_llm_metrics(metrics_container: LLMMetricsContainer) -> None: - print(f"\tPrompt Tokens: {metrics_container.prompt_tokens}") - print(f"\tResponse Tokens: {metrics_container.response_tokens}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "regression_yaml", - type=str, - help="Path to the Questions YAML file.", - default="./tests/regression/answer_quality/sample_questions.yaml", - nargs="?", - ) - parser.add_argument( - "--real-time", action="store_true", help="Set to use the real-time flow." - ) - parser.add_argument( - "--discard-metrics", - action="store_true", - help="Set to not include metrics on search, rerank, and token counts.", - ) - parser.add_argument( - "--all-results", - action="store_true", - help="Set to not include more than the 10 top sections for search and reranking metrics.", - ) - parser.add_argument( - "--output", - type=str, - help="Path to the output results file.", - default="./tests/regression/answer_quality/regression_results.txt", - ) - args = parser.parse_args() - - questions_data = load_yaml(args.regression_yaml) - - with open(args.output, "w") as outfile: - with redirect_print_to_file(outfile): - print("Running Question Answering Flow") - print( - "Note that running metrics requires tokenizing all " - "prompts/returns and slightly slows down inference." - ) - print( - "Also note that the text embedding model (bi-encoder) currently used is trained for " - "relative distances, not absolute distances. Therefore cosine similarity values may all be > 0.5 " - "even for poor matches" - ) - - with Session(engine, expire_on_commit=False) as db_session: - for sample in questions_data["questions"]: - print( - f"Running Test for Question {sample['id']}: {sample['question']}" - ) - - start_time = datetime.now() - ( - answer, - retrieval_metrics, - rerank_metrics, - ) = get_answer_for_question(sample["question"], db_session) - end_time = datetime.now() - - print(f"====Duration: {end_time - start_time}====") - print(f"Question {sample['id']}:") - print(f'\t{sample["question"]}') - print("\nApproximate Expected Answer:") - print(f'\t{sample["expected_answer"]}') - print("\nActual Answer:") - print( - word_wrap(answer) - if answer - else "\tFailed, either crashed or refused to answer." - ) - if not args.discard_metrics: - print("\nRetrieval Metrics:") - if retrieval_metrics is None: - print("No Retrieval Metrics Available") - else: - _print_retrieval_metrics( - retrieval_metrics, show_all=args.all_results - ) - - print("\nReranking Metrics:") - if rerank_metrics is None: - print("No Reranking Metrics Available") - else: - _print_reranking_metrics( - rerank_metrics, show_all=args.all_results - ) - - print("\n\n", flush=True) diff --git a/backend/tests/regression/answer_quality/file_uploader.py b/backend/tests/regression/answer_quality/file_uploader.py index a80cadb2cac..8cbc632b5b7 100644 --- a/backend/tests/regression/answer_quality/file_uploader.py +++ b/backend/tests/regression/answer_quality/file_uploader.py @@ -1,8 +1,14 @@ +import csv import os +import tempfile +import time +import zipfile +from pathlib import Path from types import SimpleNamespace import yaml +from tests.regression.answer_quality.api_utils import check_indexing_status from tests.regression.answer_quality.api_utils import create_cc_pair from tests.regression.answer_quality.api_utils import create_connector from tests.regression.answer_quality.api_utils import create_credential @@ -10,15 +16,86 @@ from tests.regression.answer_quality.api_utils import upload_file -def upload_test_files(zip_file_path: str, run_suffix: str) -> None: +def unzip_and_get_file_paths(zip_file_path: str) -> list[str]: + persistent_dir = tempfile.mkdtemp() + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + zip_ref.extractall(persistent_dir) + + file_paths = [] + for root, _, files in os.walk(persistent_dir): + for file in sorted(files): + file_paths.append(os.path.join(root, file)) + + return file_paths + + +def create_temp_zip_from_files(file_paths: list[str]) -> str: + persistent_dir = tempfile.mkdtemp() + zip_file_path = os.path.join(persistent_dir, "temp.zip") + + with zipfile.ZipFile(zip_file_path, "w") as zip_file: + for file_path in file_paths: + zip_file.write(file_path, Path(file_path).name) + + return zip_file_path + + +def upload_test_files(zip_file_path: str, env_name: str) -> None: print("zip:", zip_file_path) - file_paths = upload_file(run_suffix, zip_file_path) + file_paths = upload_file(env_name, zip_file_path) + + conn_id = create_connector(env_name, file_paths) + cred_id = create_credential(env_name) + + create_cc_pair(env_name, conn_id, cred_id) + run_cc_once(env_name, conn_id, cred_id) + + +def manage_file_upload(zip_file_path: str, env_name: str) -> None: + start_time = time.time() + unzipped_file_paths = unzip_and_get_file_paths(zip_file_path) + total_file_count = len(unzipped_file_paths) + problem_file_list: list[str] = [] + + while True: + doc_count, ongoing_index_attempts = check_indexing_status(env_name) + + if ongoing_index_attempts: + print( + f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..." + ) + elif not doc_count: + print("No docs indexed, waiting for indexing to start") + temp_zip_file_path = create_temp_zip_from_files(unzipped_file_paths) + upload_test_files(temp_zip_file_path, env_name) + os.unlink(temp_zip_file_path) + elif (doc_count + len(problem_file_list)) < total_file_count: + print(f"No ongooing indexing attempts but only {doc_count} docs indexed") + remaining_files = unzipped_file_paths[doc_count + len(problem_file_list) :] + problem_file_list.append(remaining_files.pop(0)) + print( + f"Removing first doc and grabbed last {len(remaining_files)} docs to try agian" + ) + temp_zip_file_path = create_temp_zip_from_files(remaining_files) + upload_test_files(temp_zip_file_path, env_name) + os.unlink(temp_zip_file_path) + else: + print(f"Successfully uploaded {doc_count} docs!") + break + + time.sleep(10) - conn_id = create_connector(run_suffix, file_paths) - cred_id = create_credential(run_suffix) + if problem_file_list: + problem_file_csv_path = os.path.join(current_dir, "problem_files.csv") + with open(problem_file_csv_path, "w", newline="") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(["Problematic File Paths"]) + for problem_file in problem_file_list: + csvwriter.writerow([problem_file]) - create_cc_pair(run_suffix, conn_id, cred_id) - run_cc_once(run_suffix, conn_id, cred_id) + for file in unzipped_file_paths: + os.unlink(file) + print(f"Total time taken: {(time.time() - start_time)/60} minutes") if __name__ == "__main__": @@ -27,5 +104,5 @@ def upload_test_files(zip_file_path: str, run_suffix: str) -> None: with open(config_path, "r") as file: config = SimpleNamespace(**yaml.safe_load(file)) file_location = config.zipped_documents_file - run_suffix = config.existing_test_suffix - upload_test_files(file_location, run_suffix) + env_name = config.environment_name + manage_file_upload(file_location, env_name) diff --git a/backend/tests/regression/answer_quality/launch_eval_env.py b/backend/tests/regression/answer_quality/launch_eval_env.py new file mode 100644 index 00000000000..e701a1d42cf --- /dev/null +++ b/backend/tests/regression/answer_quality/launch_eval_env.py @@ -0,0 +1,48 @@ +import os +from types import SimpleNamespace + +import yaml + +from tests.regression.answer_quality.cli_utils import manage_data_directories +from tests.regression.answer_quality.cli_utils import set_env_variables +from tests.regression.answer_quality.cli_utils import start_docker_compose +from tests.regression.answer_quality.cli_utils import switch_to_commit + + +def load_config(config_filename: str) -> SimpleNamespace: + current_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(current_dir, config_filename) + with open(config_path, "r") as file: + return SimpleNamespace(**yaml.safe_load(file)) + + +def main() -> None: + config = load_config("search_test_config.yaml") + if config.environment_name: + env_name = config.environment_name + print("launching danswer with environment name:", env_name) + else: + print("No env name defined. Not launching docker.") + print( + "Please define a name in the config yaml to start a new env " + "or use an existing env" + ) + return + + set_env_variables( + config.model_server_ip, + config.model_server_port, + config.use_cloud_gpu, + config.llm, + ) + manage_data_directories(env_name, config.output_folder, config.use_cloud_gpu) + if config.commit_sha: + switch_to_commit(config.commit_sha) + + start_docker_compose( + env_name, config.launch_web_ui, config.use_cloud_gpu, config.only_state + ) + + +if __name__ == "__main__": + main() diff --git a/backend/tests/regression/answer_quality/relari.py b/backend/tests/regression/answer_quality/relari.py deleted file mode 100644 index c669d55c253..00000000000 --- a/backend/tests/regression/answer_quality/relari.py +++ /dev/null @@ -1,119 +0,0 @@ -import json -import os -import time -from types import SimpleNamespace - -import yaml - -from tests.regression.answer_quality.api_utils import check_if_query_ready -from tests.regression.answer_quality.api_utils import get_answer_from_query -from tests.regression.answer_quality.cli_utils import get_current_commit_sha - - -def _get_and_write_relari_outputs( - samples: list[dict], run_suffix: str, output_file_path: str -) -> None: - while not check_if_query_ready(run_suffix): - time.sleep(5) - - with open(output_file_path, "w", encoding="utf-8") as file: - for sample in samples: - retrieved_context, answer = get_answer_from_query( - query=sample["question"], - run_suffix=run_suffix, - ) - - if not answer: - print("NO ANSWER GIVEN FOR QUESTION:", sample["question"]) - continue - - output = { - "label": sample["uid"], - "question": sample["question"], - "answer": answer, - "retrieved_context": retrieved_context, - } - - file.write(json.dumps(output) + "\n") - file.flush() - - -def _write_metadata_file(run_suffix: str, metadata_file_path: str) -> None: - metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix} - - print("saving metadata to:", metadata_file_path) - with open(metadata_file_path, "w", encoding="utf-8") as yaml_file: - yaml.dump(metadata, yaml_file) - - -def _read_questions_jsonl(questions_file_path: str) -> list[dict]: - questions = [] - with open(questions_file_path, "r") as file: - for line in file: - json_obj = json.loads(line) - questions.append(json_obj) - return questions - - -def answer_relari_questions( - questions_file_path: str, - results_folder_path: str, - run_suffix: str, - limit: int | None = None, -) -> None: - results_file = "run_results.jsonl" - metadata_file = "run_metadata.yaml" - samples = _read_questions_jsonl(questions_file_path) - - if limit is not None: - samples = samples[:limit] - - counter = 1 - output_file_path = os.path.join(results_folder_path, results_file) - metadata_file_path = os.path.join(results_folder_path, metadata_file) - while os.path.exists(output_file_path): - output_file_path = os.path.join( - results_folder_path, - results_file.replace("run_results", f"run_results_{counter}"), - ) - metadata_file_path = os.path.join( - results_folder_path, - metadata_file.replace("run_metadata", f"run_metadata_{counter}"), - ) - counter += 1 - - print("saving question results to:", output_file_path) - _write_metadata_file(run_suffix, metadata_file_path) - _get_and_write_relari_outputs( - samples=samples, run_suffix=run_suffix, output_file_path=output_file_path - ) - - -def main() -> None: - current_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(current_dir, "search_test_config.yaml") - with open(config_path, "r") as file: - config = SimpleNamespace(**yaml.safe_load(file)) - - current_output_folder = os.path.expanduser(config.output_folder) - if config.existing_test_suffix: - current_output_folder = os.path.join( - current_output_folder, "test" + config.existing_test_suffix, "relari_output" - ) - else: - current_output_folder = os.path.join(current_output_folder, "no_defined_suffix") - - answer_relari_questions( - config.questions_file, - current_output_folder, - config.existing_test_suffix, - config.limit, - ) - - -if __name__ == "__main__": - """ - To run a different set of questions, update the questions_file in search_test_config.yaml - If there is more than one instance of Danswer running, specify the suffix in search_test_config.yaml - """ - main() diff --git a/backend/tests/regression/answer_quality/run_qa.py b/backend/tests/regression/answer_quality/run_qa.py new file mode 100644 index 00000000000..5de034b3740 --- /dev/null +++ b/backend/tests/regression/answer_quality/run_qa.py @@ -0,0 +1,196 @@ +import json +import multiprocessing +import os +import shutil +import time + +import yaml + +from tests.regression.answer_quality.api_utils import get_answer_from_query +from tests.regression.answer_quality.cli_utils import get_current_commit_sha +from tests.regression.answer_quality.cli_utils import get_docker_container_env_vars + +RESULTS_FILENAME = "results.jsonl" +METADATA_FILENAME = "metadata.yaml" + + +def _populate_results_file(output_folder_path: str, all_qa_output: list[dict]) -> None: + output_file_path = os.path.join(output_folder_path, RESULTS_FILENAME) + with open(output_file_path, "a", encoding="utf-8") as file: + for qa_output in all_qa_output: + file.write(json.dumps(qa_output) + "\n") + file.flush() + + +def _update_metadata_file(test_output_folder: str, invalid_answer_count: int) -> None: + metadata_path = os.path.join(test_output_folder, METADATA_FILENAME) + with open(metadata_path, "r", encoding="utf-8") as file: + metadata = yaml.safe_load(file) + + metadata["number_of_failed_questions"] = invalid_answer_count + with open(metadata_path, "w", encoding="utf-8") as yaml_file: + yaml.dump(metadata, yaml_file) + + +def _read_questions_jsonl(questions_file_path: str) -> list[dict]: + questions = [] + with open(questions_file_path, "r") as file: + for line in file: + json_obj = json.loads(line) + questions.append(json_obj) + return questions + + +def _get_test_output_folder(config: dict) -> str: + base_output_folder = os.path.expanduser(config["output_folder"]) + if config["env_name"]: + base_output_folder = os.path.join( + base_output_folder, config["env_name"], "evaluations_output" + ) + else: + base_output_folder = os.path.join(base_output_folder, "no_defined_env_name") + + counter = 1 + output_folder_path = os.path.join(base_output_folder, "run_1") + while os.path.exists(output_folder_path): + output_folder_path = os.path.join( + output_folder_path.replace(f"run_{counter-1}", f"run_{counter}"), + ) + counter += 1 + + os.makedirs(output_folder_path, exist_ok=True) + + return output_folder_path + + +def _initialize_files(config: dict) -> tuple[str, list[dict]]: + test_output_folder = _get_test_output_folder(config) + + questions_file_path = config["questions_file"] + + questions = _read_questions_jsonl(questions_file_path) + + metadata = { + "commit_sha": get_current_commit_sha(), + "env_name": config["env_name"], + "test_config": config, + "number_of_questions_in_dataset": len(questions), + } + + env_vars = get_docker_container_env_vars(config["env_name"]) + if env_vars["ENV_SEED_CONFIGURATION"]: + del env_vars["ENV_SEED_CONFIGURATION"] + if env_vars["GPG_KEY"]: + del env_vars["GPG_KEY"] + if metadata["test_config"]["llm"]["api_key"]: + del metadata["test_config"]["llm"]["api_key"] + metadata.update(env_vars) + metadata_path = os.path.join(test_output_folder, METADATA_FILENAME) + print("saving metadata to:", metadata_path) + with open(metadata_path, "w", encoding="utf-8") as yaml_file: + yaml.dump(metadata, yaml_file) + + copied_questions_file_path = os.path.join( + test_output_folder, os.path.basename(questions_file_path) + ) + shutil.copy2(questions_file_path, copied_questions_file_path) + + zipped_files_path = config["zipped_documents_file"] + copied_zipped_documents_path = os.path.join( + test_output_folder, os.path.basename(zipped_files_path) + ) + shutil.copy2(zipped_files_path, copied_zipped_documents_path) + + zipped_files_folder = os.path.dirname(zipped_files_path) + jsonl_file_path = os.path.join(zipped_files_folder, "target_docs.jsonl") + if os.path.exists(jsonl_file_path): + copied_jsonl_path = os.path.join(test_output_folder, "target_docs.jsonl") + shutil.copy2(jsonl_file_path, copied_jsonl_path) + + return test_output_folder, questions + + +def _process_question(question_data: dict, config: dict, question_number: int) -> dict: + query = question_data["question"] + context_data_list, answer = get_answer_from_query( + query=query, + only_retrieve_docs=config["only_retrieve_docs"], + env_name=config["env_name"], + ) + print(f"On question number {question_number}") + print(f"query: {query}") + + if not context_data_list: + print("No answer or context found") + else: + print(f"answer: {answer[:50]}...") + print(f"{len(context_data_list)} context docs found") + print("\n") + + output = { + "question_data": question_data, + "answer": answer, + "context_data_list": context_data_list, + } + + return output + + +def _process_and_write_query_results(config: dict) -> None: + start_time = time.time() + test_output_folder, questions = _initialize_files(config) + print("saving test results to folder:", test_output_folder) + + if config["limit"] is not None: + questions = questions[: config["limit"]] + + # Use multiprocessing to process questions + with multiprocessing.Pool() as pool: + results = pool.starmap( + _process_question, + [(question, config, i + 1) for i, question in enumerate(questions)], + ) + + _populate_results_file(test_output_folder, results) + + invalid_answer_count = 0 + for result in results: + if len(result["context_data_list"]) == 0: + invalid_answer_count += 1 + + _update_metadata_file(test_output_folder, invalid_answer_count) + + if invalid_answer_count: + print(f"Warning: {invalid_answer_count} questions failed!") + print("Suggest restarting the vespa container and rerunning") + + time_to_finish = time.time() - start_time + minutes, seconds = divmod(int(time_to_finish), 60) + print( + f"Took {minutes:02d}:{seconds:02d} to ask and answer {len(results)} questions" + ) + print("saved test results to folder:", test_output_folder) + + +def run_qa_test_and_save_results(env_name: str = "") -> None: + current_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(current_dir, "search_test_config.yaml") + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + if not isinstance(config, dict): + raise TypeError("config must be a dictionary") + + if not env_name: + env_name = config["environment_name"] + + config["env_name"] = env_name + _process_and_write_query_results(config) + + +if __name__ == "__main__": + """ + To run a different set of questions, update the questions_file in search_test_config.yaml + If there is more than one instance of Danswer running, specify the env_name in search_test_config.yaml + """ + run_qa_test_and_save_results() diff --git a/backend/tests/regression/answer_quality/sample_questions.yaml b/backend/tests/regression/answer_quality/sample_questions.yaml deleted file mode 100644 index 33bedbe9cf2..00000000000 --- a/backend/tests/regression/answer_quality/sample_questions.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# This YAML file contains regression questions for Danswer. -# The sources mentioned are the same ones to power the DanswerBot for the community's use -# The regression flow assumes the data from the sources listed are already indexed - -metadata: - version: v0.0.1 - date: 2023-09-10 - sources: - - name: web - detail: https://www.danswer.ai/ - - name: web - detail: https://docs.danswer.dev/ - - name: github issues - detail: danswer-ai/danswer - - name: github pull-requests - detail: danswer-ai/danswer - - name: slack - workspace: danswer.slack.com - - name: file - detail: Markdown files from Danswer repo - -questions: - - id: 1 - question: "What is Danswer?" - expected_answer: "Danswer is an open source question-answering system." - notes: "This comes directly from the docs, the actual answer should be more informative" - - - id: 2 - question: "What is Danswer licensed under?" - expected_answer: "Danswer is MIT licensed" - notes: "This info can be found in many places" - - - id: 3 - question: "What are the required variables to set to use GPT-4?" - expected_answer: "Set the environment variables INTERNAL_MODEL_VERSION=openai-chat-completion and GEN_AI_MODEL_VERSION=gpt-4" - notes: "Two env vars are must have, the third (the key) is optional" - - - id: 4 - question: "Why might I want to use the deberta model for QnA?" - expected_answer: "This kind of model can run on CPU and are less likely to produce hallucinations" - notes: "https://docs.danswer.dev/gen_ai_configs/transformers, this is a pretty hard question" - - - id: 5 - question: "What auth related tokens do I need for BookStack?" - expected_answer: "You will need the API Token ID and the API Token Secret" - notes: "https://docs.danswer.dev/connectors/bookstack" - - - id: 6 - question: "ValueError: invalid literal for int() with base 10" - expected_answer: "This was a bug that was fixed shortly after the issue was filed. Try updating the code." - notes: "This question is in Github Issue #290" - - - id: 7 - question: "Is there support for knowledge sets or document sets?" - expected_answer: "This was requested and approved however it is not clear if the feature is implemented yet." - notes: "This question is in Github Issue #338" - - - id: 8 - question: "nginx returning 502" - expected_answer: "Google OAuth must be configured for Danswer backend to work. A PR was created to fix it" - notes: "This question is in Github Issue #260" - - - id: 9 - question: "Why isn't GPT4All enabled by default" - expected_answer: "There is no recent version of GPT4All that is compatible with M1 Mac." - notes: "This question is in Github Issue #232 but also mentioned in several other places" - - - id: 10 - question: "Why isn't GPT4All enabled by default" - expected_answer: "There is no recent version of GPT4All that is compatible with M1 Mac." - notes: "This question is in Github Issue #232 but also mentioned in several other places" - - - id: 11 - question: "Why are the models warmed up on server start" - expected_answer: "This ensures that the first indexing isn't really slow." - notes: "This is in Github PR #333" - - - id: 12 - question: "Why are the models warmed up on server start" - expected_answer: "This ensures that the first indexing isn't really slow." - notes: "This is in Github PR #333" - - - id: 13 - question: "What text from the Alation Connector is used to generate the docs?" - expected_answer: "Articles are used with the body contents. Schemas, Tables, and Columns use Description" - notes: "This is in Github PR #161" - - - id: 14 - question: "Does Danswer support PDFs in Google Drive?" - expected_answer: "Yes" - notes: "This question is in Slack, if the message expires due to using free slack version, the info may be gone as well" - - - id: 15 - question: "I deleted a connector in Danswer but some deleted docs are still showing in search" - expected_answer: "The issue was fixed via a code change, it should go away after pulling the latest code" - notes: "This question is in Slack, if the message expires due to using free slack version, the info may be gone as well" diff --git a/backend/tests/regression/answer_quality/search_quality_test.py b/backend/tests/regression/answer_quality/search_quality_test.py deleted file mode 100644 index 06da3995354..00000000000 --- a/backend/tests/regression/answer_quality/search_quality_test.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -from datetime import datetime -from types import SimpleNamespace - -import yaml - -from tests.regression.answer_quality.cli_utils import cleanup_docker -from tests.regression.answer_quality.cli_utils import manage_data_directories -from tests.regression.answer_quality.cli_utils import set_env_variables -from tests.regression.answer_quality.cli_utils import start_docker_compose -from tests.regression.answer_quality.cli_utils import switch_to_branch -from tests.regression.answer_quality.file_uploader import upload_test_files -from tests.regression.answer_quality.relari import answer_relari_questions - - -def load_config(config_filename: str) -> SimpleNamespace: - current_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(current_dir, config_filename) - with open(config_path, "r") as file: - return SimpleNamespace(**yaml.safe_load(file)) - - -def main() -> None: - config = load_config("search_test_config.yaml") - if config.existing_test_suffix: - run_suffix = config.existing_test_suffix - print("launching danswer with existing data suffix:", run_suffix) - else: - run_suffix = datetime.now().strftime("_%Y%m%d_%H%M%S") - print("run_suffix:", run_suffix) - - set_env_variables( - config.model_server_ip, - config.model_server_port, - config.use_cloud_gpu, - config.llm, - ) - relari_output_folder_path = manage_data_directories( - run_suffix, config.output_folder, config.use_cloud_gpu - ) - if config.branch: - switch_to_branch(config.branch) - - start_docker_compose(run_suffix, config.launch_web_ui, config.use_cloud_gpu) - - if not config.existing_test_suffix: - upload_test_files(config.zipped_documents_file, run_suffix) - - answer_relari_questions( - config.questions_file, relari_output_folder_path, run_suffix, config.limit - ) - - if config.clean_up_docker_containers: - cleanup_docker(run_suffix) - - -if __name__ == "__main__": - main() diff --git a/backend/tests/regression/answer_quality/search_test_config.yaml.template b/backend/tests/regression/answer_quality/search_test_config.yaml.template index 47310a3c373..eb813df57f3 100644 --- a/backend/tests/regression/answer_quality/search_test_config.yaml.template +++ b/backend/tests/regression/answer_quality/search_test_config.yaml.template @@ -10,15 +10,15 @@ zipped_documents_file: "~/sampledocs.zip" # Path to the YAML file containing sample questions questions_file: "~/sample_questions.yaml" -# Git branch to use (null means use current branch as is) -branch: null - -# Whether to remove Docker containers after the test -clean_up_docker_containers: true +# Git commit SHA to use (null means use current code as is) +commit_sha: null # Whether to launch a web UI for the test launch_web_ui: false +# Only retrieve documents, not LLM response +only_retrieve_docs: false + # Whether to use a cloud GPU for processing use_cloud_gpu: false @@ -28,8 +28,8 @@ model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE" # Port of the model server (placeholder) model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE" -# Suffix for existing test results (empty string means no suffix) -existing_test_suffix: "" +# Name for existing testing env (empty string uses default ports) +environment_name: "" # Limit on number of tests to run (null means no limit) limit: null diff --git a/backend/tests/regression/search_quality/eval_search.py b/backend/tests/regression/search_quality/eval_search.py deleted file mode 100644 index d20c685dc27..00000000000 --- a/backend/tests/regression/search_quality/eval_search.py +++ /dev/null @@ -1,251 +0,0 @@ -import argparse -import builtins -import json -from contextlib import contextmanager -from typing import Any -from typing import TextIO - -from sqlalchemy.orm import Session - -from danswer.db.engine import get_sqlalchemy_engine -from danswer.llm.answering.prune_and_merge import reorder_sections -from danswer.llm.factory import get_default_llms -from danswer.search.models import InferenceSection -from danswer.search.models import RerankMetricsContainer -from danswer.search.models import RetrievalMetricsContainer -from danswer.search.models import SearchRequest -from danswer.search.pipeline import SearchPipeline -from danswer.utils.callbacks import MetricsHander - - -engine = get_sqlalchemy_engine() - - -@contextmanager -def redirect_print_to_file(file: TextIO) -> Any: - original_print = builtins.print - - def new_print(*args: Any, **kwargs: Any) -> Any: - kwargs["file"] = file - original_print(*args, **kwargs) - - builtins.print = new_print - - try: - yield - finally: - builtins.print = original_print - - -def read_json(file_path: str) -> dict: - with open(file_path, "r") as file: - return json.load(file) - - -def word_wrap(s: str, max_line_size: int = 100, prepend_tab: bool = True) -> str: - words = s.split() - - current_line: list[str] = [] - result_lines: list[str] = [] - current_length = 0 - for word in words: - if len(word) > max_line_size: - if current_line: - result_lines.append(" ".join(current_line)) - current_line = [] - current_length = 0 - - result_lines.append(word) - continue - - if current_length + len(word) + len(current_line) > max_line_size: - result_lines.append(" ".join(current_line)) - current_line = [] - current_length = 0 - - current_line.append(word) - current_length += len(word) - - if current_line: - result_lines.append(" ".join(current_line)) - - return "\t" + "\n\t".join(result_lines) if prepend_tab else "\n".join(result_lines) - - -def get_search_results( - query: str, -) -> tuple[ - list[InferenceSection], - RetrievalMetricsContainer | None, - RerankMetricsContainer | None, -]: - retrieval_metrics = MetricsHander[RetrievalMetricsContainer]() - rerank_metrics = MetricsHander[RerankMetricsContainer]() - - with Session(get_sqlalchemy_engine()) as db_session: - llm, fast_llm = get_default_llms() - search_pipeline = SearchPipeline( - search_request=SearchRequest( - query=query, - ), - user=None, - llm=llm, - fast_llm=fast_llm, - db_session=db_session, - retrieval_metrics_callback=retrieval_metrics.record_metric, - rerank_metrics_callback=rerank_metrics.record_metric, - ) - - top_sections = search_pipeline.reranked_sections - llm_section_selection = search_pipeline.section_relevance_list - - return ( - reorder_sections(top_sections, llm_section_selection), - retrieval_metrics.metrics, - rerank_metrics.metrics, - ) - - -def _print_retrieval_metrics( - metrics_container: RetrievalMetricsContainer, show_all: bool = False -) -> None: - for ind, metric in enumerate(metrics_container.metrics): - if not show_all and ind >= 10: - break - - if ind != 0: - print() # for spacing purposes - print(f"\tDocument: {metric.document_id}") - section_start = metric.chunk_content_start.replace("\n", " ") - print(f"\tSection Start: {section_start}") - print(f"\tSimilarity Distance Metric: {metric.score}") - - -def _print_reranking_metrics( - metrics_container: RerankMetricsContainer, show_all: bool = False -) -> None: - # Printing the raw scores as they're more informational than post-norm/boosting - for ind, metric in enumerate(metrics_container.metrics): - if not show_all and ind >= 10: - break - - if ind != 0: - print() # for spacing purposes - print(f"\tDocument: {metric.document_id}") - section_start = metric.chunk_content_start.replace("\n", " ") - print(f"\tSection Start: {section_start}") - print(f"\tSimilarity Score: {metrics_container.raw_similarity_scores[ind]}") - - -def calculate_score( - log_prefix: str, chunk_ids: list[str], targets: list[str], max_chunks: int = 5 -) -> float: - top_ids = chunk_ids[:max_chunks] - matches = [top_id for top_id in top_ids if top_id in targets] - print(f"{log_prefix} Hits: {len(matches)}/{len(targets)}", end="\t") - return len(matches) / min(len(targets), max_chunks) - - -def main( - questions_json: str, - output_file: str, - show_details: bool, - enable_llm: bool, - stop_after: int, -) -> None: - questions_info = read_json(questions_json) - - running_retrieval_score = 0.0 - running_rerank_score = 0.0 - running_llm_filter_score = 0.0 - - with open(output_file, "w") as outfile: - with redirect_print_to_file(outfile): - print("Running Document Retrieval Test\n") - for ind, (question, targets) in enumerate(questions_info.items()): - if ind >= stop_after: - break - - print(f"\n\nQuestion: {question}") - - ( - top_sections, - retrieval_metrics, - rerank_metrics, - ) = get_search_results(query=question) - - assert retrieval_metrics is not None and rerank_metrics is not None - - retrieval_ids = [ - metric.document_id for metric in retrieval_metrics.metrics - ] - retrieval_score = calculate_score("Retrieval", retrieval_ids, targets) - running_retrieval_score += retrieval_score - print(f"Average: {running_retrieval_score / (ind + 1)}") - - rerank_ids = [metric.document_id for metric in rerank_metrics.metrics] - rerank_score = calculate_score("Rerank", rerank_ids, targets) - running_rerank_score += rerank_score - print(f"Average: {running_rerank_score / (ind + 1)}") - - llm_ids = [section.center_chunk.document_id for section in top_sections] - llm_score = calculate_score("LLM Filter", llm_ids, targets) - running_llm_filter_score += llm_score - print(f"Average: {running_llm_filter_score / (ind + 1)}") - - if show_details: - print("\nRetrieval Metrics:") - if retrieval_metrics is None: - print("No Retrieval Metrics Available") - else: - _print_retrieval_metrics(retrieval_metrics) - - print("\nReranking Metrics:") - if rerank_metrics is None: - print("No Reranking Metrics Available") - else: - _print_reranking_metrics(rerank_metrics) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "regression_questions_json", - type=str, - help="Path to the Questions JSON file.", - default="./tests/regression/search_quality/test_questions.json", - nargs="?", - ) - parser.add_argument( - "--output_file", - type=str, - help="Path to the output results file.", - default="./tests/regression/search_quality/regression_results.txt", - ) - parser.add_argument( - "--show_details", - action="store_true", - help="If set, show details of the retrieved chunks.", - default=False, - ) - parser.add_argument( - "--enable_llm", - action="store_true", - help="If set, use LLM chunk filtering (this can get very expensive).", - default=False, - ) - parser.add_argument( - "--stop_after", - type=int, - help="Stop processing after this many iterations.", - default=100, - ) - args = parser.parse_args() - - main( - args.regression_questions_json, - args.output_file, - args.show_details, - args.enable_llm, - args.stop_after, - ) diff --git a/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py b/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py new file mode 100644 index 00000000000..92bccaa050d --- /dev/null +++ b/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py @@ -0,0 +1,59 @@ +from unittest.mock import Mock +from unittest.mock import patch + +import pytest +from requests import HTTPError + +from danswer.connectors.confluence.rate_limit_handler import ( + make_confluence_call_handle_rate_limit, +) + + +@pytest.fixture +def mock_confluence_call() -> Mock: + return Mock() + + +@pytest.mark.parametrize( + "status_code,text,retry_after", + [ + (429, "Rate limit exceeded", "5"), + (200, "Rate limit exceeded", None), + (429, "Some other error", "5"), + ], +) +def test_rate_limit_handling( + mock_confluence_call: Mock, status_code: int, text: str, retry_after: str | None +) -> None: + with patch("time.sleep") as mock_sleep: + mock_confluence_call.side_effect = [ + HTTPError( + response=Mock( + status_code=status_code, + text=text, + headers={"Retry-After": retry_after} if retry_after else {}, + ) + ), + ] * 2 + ["Success"] + + handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call) + result = handled_call() + + assert result == "Success" + assert mock_confluence_call.call_count == 3 + assert mock_sleep.call_count == 2 + if retry_after: + mock_sleep.assert_called_with(int(retry_after)) + + +def test_non_rate_limit_error(mock_confluence_call: Mock) -> None: + mock_confluence_call.side_effect = HTTPError( + response=Mock(status_code=500, text="Internal Server Error") + ) + + handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call) + + with pytest.raises(HTTPError): + handled_call() + + assert mock_confluence_call.call_count == 1 diff --git a/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py index 260a5619fd1..2a2c841a466 100644 --- a/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py +++ b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py @@ -115,27 +115,20 @@ def test_mediawiki_connector_recurse_depth() -> None: hostname = "wikipedia.org" categories: list[str] = [] pages = ["Test Page"] - connector_name = "Test Connector" # Recurse depth less than -1 raises ValueError with pytest.raises(ValueError): recurse_depth = -2 - wiki.MediaWikiConnector( - hostname, categories, pages, recurse_depth, connector_name - ) + wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth) # Recurse depth of -1 gets parsed as `True` recurse_depth = -1 - connector = wiki.MediaWikiConnector( - hostname, categories, pages, recurse_depth, connector_name - ) + connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth) assert connector.recurse_depth is True # Recurse depth of 0 or greater gets parsed as an integer recurse_depth = 0 - connector = wiki.MediaWikiConnector( - hostname, categories, pages, recurse_depth, connector_name - ) + connector = wiki.MediaWikiConnector(hostname, categories, pages, recurse_depth) assert connector.recurse_depth == recurse_depth diff --git a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py index 9a21fc4db41..d3974fe47ab 100644 --- a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py +++ b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py @@ -114,6 +114,7 @@ def test_fuzzy_match_quotes_to_docs() -> None: }, blurb="anything", semantic_identifier="anything", + title="whatever", section_continuation=False, recency_bias=1, boost=0, @@ -131,6 +132,7 @@ def test_fuzzy_match_quotes_to_docs() -> None: source_links={0: "doc 1 base", 36: "2nd line link", 82: "last link"}, blurb="whatever", semantic_identifier="whatever", + title="whatever", section_continuation=False, recency_bias=1, boost=0, diff --git a/backend/tests/unit/danswer/indexing/test_chunker.py b/backend/tests/unit/danswer/indexing/test_chunker.py new file mode 100644 index 00000000000..f3a72fe17a3 --- /dev/null +++ b/backend/tests/unit/danswer/indexing/test_chunker.py @@ -0,0 +1,51 @@ +from danswer.configs.constants import DocumentSource +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.indexing.chunker import Chunker +from danswer.indexing.embedder import DefaultIndexingEmbedder + + +def test_chunk_document() -> None: + short_section_1 = "This is a short section." + long_section = ( + "This is a long section that should be split into multiple chunks. " * 100 + ) + short_section_2 = "This is another short section." + short_section_3 = "This is another short section again." + short_section_4 = "Final short section." + semantic_identifier = "Test Document" + + document = Document( + id="test_doc", + source=DocumentSource.WEB, + semantic_identifier=semantic_identifier, + metadata={"tags": ["tag1", "tag2"]}, + doc_updated_at=None, + sections=[ + Section(text=short_section_1, link="link1"), + Section(text=short_section_2, link="link2"), + Section(text=long_section, link="link3"), + Section(text=short_section_3, link="link4"), + Section(text=short_section_4, link="link5"), + ], + ) + + embedder = DefaultIndexingEmbedder( + model_name="intfloat/e5-base-v2", + normalize=True, + query_prefix=None, + passage_prefix=None, + ) + + chunker = Chunker( + tokenizer=embedder.embedding_model.tokenizer, + enable_multipass=False, + ) + chunks = chunker.chunk(document) + + assert len(chunks) == 5 + assert short_section_1 in chunks[0].content + assert short_section_3 in chunks[-1].content + assert short_section_4 in chunks[-1].content + assert "tag1" in chunks[0].metadata_suffix_keyword + assert "tag2" in chunks[0].metadata_suffix_semantic diff --git a/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py b/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py index 87d8e0d32f4..473ccf2451a 100644 --- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py +++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py @@ -257,6 +257,35 @@ def process_text( "[[1]](https://0.com)[[2]]()t]", ["doc_0", "doc_1"], ), + ( + "Citations with extraneous citations", + [ + "[[1]](https://0.com) Citation", + " at ", + "the beginning. ", + "[", + "3", + "]", + " In the mid", + "dle. At the end ", + "[", + "5", + "]", + ".", + ], + "[[1]](https://0.com) Citation at the beginning. [[2]]() In the middle. At the end [[3]](https://2.com).", + ["doc_0", "doc_1", "doc_2"], + ), + ( + "Citations with extraneous citations, split up", + [ + "[[1]](", + "https://0.com) Citation at ", + "the beginning. ", + ], + "[[1]](https://0.com) Citation at the beginning. ", + ["doc_0"], + ), ], ) def test_citation_extraction( diff --git a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py new file mode 100644 index 00000000000..e80c5c4f657 --- /dev/null +++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py @@ -0,0 +1,351 @@ +import json +from datetime import datetime + +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import DanswerQuotes +from danswer.chat.models import LlmDoc +from danswer.configs.constants import DocumentSource +from danswer.llm.answering.stream_processing.quotes_processing import ( + process_model_tokens, +) + +mock_docs = [ + LlmDoc( + document_id=f"doc_{int(id/2)}", + content="Document is a doc", + blurb=f"Document #{id}", + semantic_identifier=f"Doc {id}", + source_type=DocumentSource.WEB, + metadata={}, + updated_at=datetime.now(), + link=f"https://{int(id/2)}.com" if int(id / 2) % 2 == 0 else None, + source_links={0: "https://mintlify.com/docs/settings/broken-links"}, + ) + for id in range(10) +] + + +tokens_with_quotes = [ + "{", + "\n ", + '"answer": "Yes', + ", Danswer allows", + " customized prompts. This", + " feature", + " is currently being", + " developed and implemente", + "d to", + " improve", + " the accuracy", + " of", + " Language", + " Models (", + "LL", + "Ms) for", + " different", + " companies", + ".", + " The custom", + "ized prompts feature", + " woul", + "d allow users to ad", + "d person", + "alized prom", + "pts through", + " an", + " interface or", + " metho", + "d,", + " which would then be used to", + " train", + " the LLM.", + " This enhancement", + " aims to make", + " Danswer more", + " adaptable to", + " different", + " business", + " contexts", + " by", + " tail", + "oring it", + " to the specific language", + " an", + "d terminology", + " used within", + " a", + " company.", + " Additionally", + ",", + " Danswer already", + " supports creating", + " custom AI", + " Assistants with", + " different", + " prom", + "pts and backing", + " knowledge", + " sets", + ",", + " which", + " is", + " a form", + " of prompt", + " customization. However, it", + "'s important to nLogging Details LiteLLM-Success Call: Noneote that some", + " aspects", + " of prompt", + " customization,", + " such as for", + " Sl", + "ack", + "b", + "ots, may", + " still", + " be in", + " development or have", + ' limitations.",', + '\n "quotes": [', + '\n "We', + " woul", + "d like to ad", + "d customized prompts for", + " different", + " companies to improve the accuracy of", + " Language", + " Model", + " (LLM)", + '.",\n "A', + " new", + " feature that", + " allows users to add personalize", + "d prompts.", + " This would involve", + " creating", + " an interface or method for", + " users to input", + " their", + " own", + " prom", + "pts,", + " which would then be used to", + ' train the LLM.",', + '\n "Create', + " custom AI Assistants with", + " different prompts and backing knowledge", + ' sets.",', + '\n "This', + " PR", + " fixes", + " https", + "://github.com/dan", + "swer-ai/dan", + "swer/issues/1", + "584", + " by", + " setting", + " the system", + " default", + " prompt for", + " sl", + "ackbots const", + "rained by", + " ", + "document sets", + ".", + " It", + " probably", + " isn", + "'t ideal", + " -", + " it", + " might", + " be pref", + "erable to be", + " able to select", + " a prompt for", + " the", + " slackbot from", + " the", + " admin", + " panel", + " -", + " but it sol", + "ves the immediate problem", + " of", + " the slack", + " listener", + " cr", + "ashing when", + " configure", + "d this", + ' way."\n ]', + "\n}", + "", +] + + +def test_process_model_tokens_answer() -> None: + gen = process_model_tokens(tokens=iter(tokens_with_quotes), context_docs=mock_docs) + + s_json = "".join(tokens_with_quotes) + j = json.loads(s_json) + expected_answer = j["answer"] + actual = "" + for o in gen: + if isinstance(o, DanswerAnswerPiece): + if o.answer_piece: + actual += o.answer_piece + + assert expected_answer == actual + + +def test_simple_json_answer() -> None: + tokens = [ + "```", + "json", + "\n", + "{", + '"answer": "This is a simple ', + "answer.", + '",\n"', + 'quotes": []', + "\n}", + "\n", + "```", + ] + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + expected_answer = "This is a simple answer." + actual = "".join( + o.answer_piece + for o in gen + if isinstance(o, DanswerAnswerPiece) and o.answer_piece + ) + + assert expected_answer == actual + + +def test_json_answer_with_quotes() -> None: + tokens = [ + "```", + "json", + "\n", + "{", + '"answer": "This ', + "is a ", + "split ", + "answer.", + '",\n"', + 'quotes": []', + "\n}", + "\n", + "```", + ] + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + expected_answer = "This is a split answer." + actual = "".join( + o.answer_piece + for o in gen + if isinstance(o, DanswerAnswerPiece) and o.answer_piece + ) + + assert expected_answer == actual + + +def test_json_answer_split_tokens() -> None: + tokens = [ + "```", + "json", + "\n", + "{", + '\n"', + 'answer": "This ', + "is a ", + "split ", + "answer.", + '",\n"', + 'quotes": []', + "\n}", + "\n", + "```", + ] + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + expected_answer = "This is a split answer." + actual = "".join( + o.answer_piece + for o in gen + if isinstance(o, DanswerAnswerPiece) and o.answer_piece + ) + + assert expected_answer == actual + + +def test_lengthy_prefixed_json_with_quotes() -> None: + tokens = [ + "This is my response in json\n\n", + "```", + "json", + "\n", + "{", + '"answer": "This is a simple ', + "answer.", + '",\n"', + 'quotes": ["Document"]', + "\n}", + "\n", + "```", + ] + + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + actual_answer = "" + actual_count = 0 + for o in gen: + if isinstance(o, DanswerAnswerPiece): + if o.answer_piece: + actual_answer += o.answer_piece + continue + + if isinstance(o, DanswerQuotes): + for q in o.quotes: + assert q.quote == "Document" + actual_count += 1 + assert "This is a simple answer." == actual_answer + assert 1 == actual_count + + +def test_prefixed_json_with_quotes() -> None: + tokens = [ + "```", + "json", + "\n", + "{", + '"answer": "This is a simple ', + "answer.", + '",\n"', + 'quotes": ["Document"]', + "\n}", + "\n", + "```", + ] + + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + actual_answer = "" + actual_count = 0 + for o in gen: + if isinstance(o, DanswerAnswerPiece): + if o.answer_piece: + actual_answer += o.answer_piece + continue + + if isinstance(o, DanswerQuotes): + for q in o.quotes: + assert q.quote == "Document" + actual_count += 1 + + assert "This is a simple answer." == actual_answer + assert 1 == actual_count diff --git a/backend/tests/unit/danswer/llm/answering/test_prune_and_merge.py b/backend/tests/unit/danswer/llm/answering/test_prune_and_merge.py index 1782f3edbbd..9d28339a1f5 100644 --- a/backend/tests/unit/danswer/llm/answering/test_prune_and_merge.py +++ b/backend/tests/unit/danswer/llm/answering/test_prune_and_merge.py @@ -24,6 +24,7 @@ def create_inference_chunk( chunk_id=chunk_id, document_id=document_id, semantic_identifier=f"{document_id}_{chunk_id}", + title="whatever", blurb=f"{document_id}_{chunk_id}", content=content, source_links={0: "fake_link"}, diff --git a/backend/throttle.ctrl b/backend/throttle.ctrl index 03aa9179535..e69de29bb2d 100644 --- a/backend/throttle.ctrl +++ b/backend/throttle.ctrl @@ -1 +0,0 @@ -f1f2 1 1718910083.03085 wikipedia:en \ No newline at end of file diff --git a/deployment/docker_compose/README.md b/deployment/docker_compose/README.md index a12f22bea7d..a5f650b5303 100644 --- a/deployment/docker_compose/README.md +++ b/deployment/docker_compose/README.md @@ -8,7 +8,7 @@ For general information, please read the instructions in this [README](https://g This part is elaborated precisely in in this [README](https://github.com/danswer-ai/danswer/blob/main/deployment/README.md) in section *Docker Compose*. If you have any questions, please feel free to open an issue or get in touch in slack for support. ## Deploy in a system with GPU support -Running Model servers with GPU support while indexing and querying can result in significant improvements in performance. This is highly recommended if you have access to resources. Currently, Danswer offloads embedding model and tokenizers to the GPU VRAM and the size needed depends on chosen embedding model. Default embedding models `intfloat/e5-base-v2` takes up about 1GB of VRAM and since we need this for inference and embedding pipeline, you would need roughly 2GB of VRAM. +Running Model servers with GPU support while indexing and querying can result in significant improvements in performance. This is highly recommended if you have access to resources. Currently, Danswer offloads embedding model and tokenizers to the GPU VRAM and the size needed depends on chosen embedding model. For example, the embedding model `nomic-ai/nomic-embed-text-v1` takes up about 1GB of VRAM. That means running this model for inference and embedding pipeline would require roughly 2GB of VRAM. ### Setup To be able to use NVIDIA runtime, following is mandatory: diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 0f35829230e..636879497a2 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -1,7 +1,7 @@ version: "3" services: api_server: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -19,7 +19,7 @@ services: environment: # Auth Settings - AUTH_TYPE=${AUTH_TYPE:-disabled} - - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400} + - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-} - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-} - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} @@ -33,6 +33,7 @@ services: - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-} - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-} - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-} + - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-} # Gen AI Settings - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} @@ -44,13 +45,13 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} - BING_API_KEY=${BING_API_KEY:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} # if set, allows for the use of the token budget system - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-} # Enables the use of bedrock models @@ -74,8 +75,7 @@ services: - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} - - ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-} - - ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-} + - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} # Leave this on pretty please? Nothing sensitive is collected! @@ -89,7 +89,9 @@ services: # (time spent on finding the right docs + time spent fetching summaries from disk) - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-} - + - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-} + - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-} + # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} @@ -104,7 +106,7 @@ services: max-file: "6" background: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -128,7 +130,6 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} @@ -206,7 +207,7 @@ services: max-file: "6" web_server: - image: danswer/danswer-web-server:latest + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile @@ -216,6 +217,7 @@ services: - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-} # Enterprise Edition only - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} @@ -228,12 +230,13 @@ services: - INTERNAL_URL=http://api_server:8080 - WEB_DOMAIN=${WEB_DOMAIN:-} - THEME_IS_DARK=${THEME_IS_DARK:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} inference_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -259,7 +262,7 @@ services: max-file: "6" indexing_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -278,7 +281,7 @@ services: - LOG_LEVEL=${LOG_LEVEL:-info} volumes: # Not necessary, this is just to reduce download time during startup - - model_cache_huggingface:/root/.cache/huggingface/ + - indexing_huggingface_model_cache:/root/.cache/huggingface/ logging: driver: json-file options: @@ -287,6 +290,7 @@ services: relational_db: image: postgres:15.2-alpine + command: -c 'max_connections=150' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} @@ -339,9 +343,10 @@ services: command: > /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" - + volumes: db_volume: - vespa_volume: - # Created by the container itself + vespa_volume: # Created by the container itself + model_cache_huggingface: + indexing_huggingface_model_cache: diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index a46dd00a38f..9079bd10dff 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -1,7 +1,7 @@ version: '3' services: api_server: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -19,7 +19,7 @@ services: environment: # Auth Settings - AUTH_TYPE=${AUTH_TYPE:-disabled} - - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400} + - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-} - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-} - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} @@ -30,6 +30,7 @@ services: - SMTP_USER=${SMTP_USER:-} - SMTP_PASS=${SMTP_PASS:-} - EMAIL_FROM=${EMAIL_FROM:-} + - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-} # Gen AI Settings - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} @@ -41,7 +42,7 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} @@ -70,8 +71,7 @@ services: - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} - - ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-} - - ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-} + - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} # Leave this on pretty please? Nothing sensitive is collected! @@ -98,7 +98,7 @@ services: background: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -122,7 +122,7 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} @@ -201,7 +201,7 @@ services: web_server: - image: danswer/danswer-web-server:latest + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile @@ -211,6 +211,7 @@ services: - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-} - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} depends_on: - api_server @@ -225,7 +226,7 @@ services: inference_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} # for GPU support, please read installation guidelines in the README.md # bare minimum to get this working is to install nvidia-container-toolkit deploy: @@ -261,7 +262,7 @@ services: indexing_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -289,7 +290,7 @@ services: - LOG_LEVEL=${LOG_LEVEL:-info} volumes: # Not necessary, this is just to reduce download time during startup - - model_cache_huggingface:/root/.cache/huggingface/ + - indexing_huggingface_model_cache:/root/.cache/huggingface/ logging: driver: json-file options: @@ -299,6 +300,7 @@ services: relational_db: image: postgres:15.2-alpine + command: -c 'max_connections=150' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} @@ -360,3 +362,4 @@ volumes: vespa_volume: # Created by the container itself model_cache_huggingface: + indexing_huggingface_model_cache: diff --git a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml index 49024b6e366..250012bd7f5 100644 --- a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml +++ b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml @@ -1,7 +1,7 @@ version: "3" services: api_server: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -30,7 +30,7 @@ services: max-file: "6" background: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -58,7 +58,7 @@ services: max-file: "6" web_server: - image: danswer/danswer-web-server:latest + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile @@ -68,6 +68,7 @@ services: - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-} - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} depends_on: - api_server @@ -83,7 +84,7 @@ services: max-file: "6" inference_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -109,7 +110,7 @@ services: max-file: "6" indexing_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -128,7 +129,7 @@ services: - LOG_LEVEL=${LOG_LEVEL:-info} volumes: # Not necessary, this is just to reduce download time during startup - - model_cache_huggingface:/root/.cache/huggingface/ + - indexing_huggingface_model_cache:/root/.cache/huggingface/ logging: driver: json-file options: @@ -137,6 +138,7 @@ services: relational_db: image: postgres:15.2-alpine + command: -c 'max_connections=150' restart: always # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file env_file: @@ -199,3 +201,4 @@ volumes: vespa_volume: # Created by the container itself model_cache_huggingface: + indexing_huggingface_model_cache: diff --git a/deployment/docker_compose/docker-compose.prod.yml b/deployment/docker_compose/docker-compose.prod.yml index 9c5d7f8c1f6..e2c2b072f93 100644 --- a/deployment/docker_compose/docker-compose.prod.yml +++ b/deployment/docker_compose/docker-compose.prod.yml @@ -1,7 +1,7 @@ version: "3" services: api_server: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -30,7 +30,7 @@ services: max-file: "6" background: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -58,7 +58,7 @@ services: max-file: "6" web_server: - image: danswer/danswer-web-server:latest + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile @@ -84,6 +84,7 @@ services: relational_db: image: postgres:15.2-alpine + command: -c 'max_connections=150' restart: always # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file env_file: @@ -97,7 +98,7 @@ services: max-file: "6" inference_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -123,7 +124,7 @@ services: max-file: "6" indexing_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -142,7 +143,7 @@ services: - LOG_LEVEL=${LOG_LEVEL:-info} volumes: # Not necessary, this is just to reduce download time during startup - - model_cache_huggingface:/root/.cache/huggingface/ + - indexing_huggingface_model_cache:/root/.cache/huggingface/ logging: driver: json-file options: @@ -217,3 +218,4 @@ volumes: vespa_volume: # Created by the container itself model_cache_huggingface: + indexing_huggingface_model_cache: diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index 41eb50eaf8c..efb387eb083 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -1,7 +1,7 @@ version: '3' services: api_server: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -35,7 +35,7 @@ services: background: - image: danswer/danswer-backend:latest + image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile @@ -65,7 +65,7 @@ services: web_server: - image: danswer/danswer-web-server:latest + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile @@ -75,6 +75,7 @@ services: - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-} # Enterprise Edition only - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} @@ -93,7 +94,7 @@ services: inference_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -117,7 +118,7 @@ services: indexing_model_server: - image: danswer/danswer-model-server:latest + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server @@ -143,6 +144,7 @@ services: relational_db: image: postgres:15.2-alpine + command: -c 'max_connections=150' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} @@ -179,9 +181,8 @@ services: - web_server environment: - DOMAIN=localhost - ports: - - "80:80" - - "3000:80" # allow for localhost:3000 usage, since that is the norm + ports: + - "${NGINX_PORT:-3000}:80" # allow for localhost:3000 usage, since that is the norm volumes: - ../data/nginx:/etc/nginx/conf.d logging: diff --git a/deployment/docker_compose/env.multilingual.template b/deployment/docker_compose/env.multilingual.template index e6059c6ae7d..e218305153f 100644 --- a/deployment/docker_compose/env.multilingual.template +++ b/deployment/docker_compose/env.multilingual.template @@ -24,18 +24,13 @@ NORMALIZE_EMBEDDINGS="True" # Use LLM to determine if chunks are relevant to the query # May not work well for languages that do not have much training data in the LLM training set # If using a common language like Spanish, French, Chinese, etc. this can be kept turned on -DISABLE_LLM_CHUNK_FILTER="True" - -# The default reranking models are English first -# There are no great quality French/English reranking models currently so turning this off -ENABLE_RERANKING_ASYNC_FLOW="False" -ENABLE_RERANKING_REAL_TIME_FLOW="False" +DISABLE_LLM_DOC_RELEVANCE="True" # Enables fine-grained embeddings for better retrieval # At the cost of indexing speed (~5x slower), query time is same speed # Since reranking is turned off and multilingual retrieval is generally harder # it is advised to turn this one on -ENABLE_MINI_CHUNK="True" +ENABLE_MULTIPASS_INDEXING="True" # Using a stronger LLM will help with multilingual tasks # Since documents may be in multiple languages, and there are additional instructions to respond diff --git a/deployment/docker_compose/env.prod.template b/deployment/docker_compose/env.prod.template index d356995df93..818bd1ed1bf 100644 --- a/deployment/docker_compose/env.prod.template +++ b/deployment/docker_compose/env.prod.template @@ -57,8 +57,8 @@ SECRET= #SAML_CONF_DIR= -# How long before user needs to reauthenticate, default to 1 day. (cookie expiration time) -SESSION_EXPIRE_TIME_SECONDS=86400 +# How long before user needs to reauthenticate, default to 7 days. (cookie expiration time) +SESSION_EXPIRE_TIME_SECONDS=604800 # Use the below to specify a list of allowed user domains, only checked if user Auth is turned on diff --git a/deployment/helm/templates/background-deployment.yaml b/deployment/helm/templates/background-deployment.yaml index 3cd65a99af4..f4ac25fe0bc 100644 --- a/deployment/helm/templates/background-deployment.yaml +++ b/deployment/helm/templates/background-deployment.yaml @@ -46,6 +46,6 @@ spec: - configMapRef: name: {{ .Values.config.envConfigMapName }} env: - - name: ENABLE_MINI_CHUNK + - name: ENABLE_MULTIPASS_INDEXING value: "{{ .Values.background.enableMiniChunk }}" {{- include "danswer-stack.envSecrets" . | nindent 12}} diff --git a/deployment/helm/values.yaml b/deployment/helm/values.yaml index bb41a7511b9..2167b70438b 100644 --- a/deployment/helm/values.yaml +++ b/deployment/helm/values.yaml @@ -402,7 +402,7 @@ configMap: GEN_AI_MAX_TOKENS: "" QA_TIMEOUT: "60" MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_CHUNK_FILTER: "" + DISABLE_LLM_DOC_RELEVANCE: "" DISABLE_LLM_CHOOSE_SEARCH: "" DISABLE_LLM_QUERY_REPHRASE: "" # Query Options @@ -420,8 +420,7 @@ configMap: NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: "" ASYM_PASSAGE_PREFIX: "" - ENABLE_RERANKING_REAL_TIME_FLOW: "" - ENABLE_RERANKING_ASYNC_FLOW: "" + DISABLE_RERANK_FOR_STREAMING: "" MODEL_SERVER_PORT: "" MIN_THREADS_ML_MODELS: "" # Indexing Configs diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index 81918c147a6..907fae1c836 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -24,7 +24,7 @@ data: GEN_AI_MAX_TOKENS: "" QA_TIMEOUT: "60" MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_CHUNK_FILTER: "" + DISABLE_LLM_DOC_RELEVANCE: "" DISABLE_LLM_CHOOSE_SEARCH: "" DISABLE_LLM_QUERY_REPHRASE: "" # Query Options @@ -45,8 +45,7 @@ data: NORMALIZE_EMBEDDINGS: "" ASYM_QUERY_PREFIX: "" ASYM_PASSAGE_PREFIX: "" - ENABLE_RERANKING_REAL_TIME_FLOW: "" - ENABLE_RERANKING_ASYNC_FLOW: "" + DISABLE_RERANK_FOR_STREAMING: "" MODEL_SERVER_HOST: "inference-model-server-service" MODEL_SERVER_PORT: "" INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service" diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml index 17330204c1e..33f2200b801 100644 --- a/deployment/kubernetes/postgres-service-deployment.yaml +++ b/deployment/kubernetes/postgres-service-deployment.yaml @@ -40,6 +40,7 @@ spec: secretKeyRef: name: danswer-secrets key: postgres_password + args: ["-c", "max_connections=150"] ports: - containerPort: 5432 volumeMounts: diff --git a/examples/widget/.env.example b/examples/widget/.env.example new file mode 100644 index 00000000000..b92284bf274 --- /dev/null +++ b/examples/widget/.env.example @@ -0,0 +1,2 @@ +NEXT_PUBLIC_API_URL=https://example.danswer.ai +NEXT_PUBLIC_API_KEY=some_long_api_key_here \ No newline at end of file diff --git a/examples/widget/.eslintrc.json b/examples/widget/.eslintrc.json new file mode 100644 index 00000000000..bffb357a712 --- /dev/null +++ b/examples/widget/.eslintrc.json @@ -0,0 +1,3 @@ +{ + "extends": "next/core-web-vitals" +} diff --git a/examples/widget/.gitignore b/examples/widget/.gitignore new file mode 100644 index 00000000000..fd3dbb571a1 --- /dev/null +++ b/examples/widget/.gitignore @@ -0,0 +1,36 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js +.yarn/install-state.gz + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# local env files +.env*.local + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/examples/widget/README.md b/examples/widget/README.md new file mode 100644 index 00000000000..cb32ecd073a --- /dev/null +++ b/examples/widget/README.md @@ -0,0 +1,70 @@ +# Danswer Chat Bot Widget +Note: The widget requires a Danswer API key, which is a paid (cloud/enterprise) feature. + +This is a code example for how you can use Danswer's APIs to build a chat bot widget for a website! The main code to look at can be found in `src/app/widget/Widget.tsx`. + +## Getting Started + +To get the widget working on your webpage, follow these steps: + +### 1. Install Dependencies + +Ensure you have the necessary dependencies installed. From the `examples/widget/README.md` file: +```bash +npm i +``` + + +### 2. Set Environment Variables + +Make sure to set the environment variables `NEXT_PUBLIC_API_URL` and `NEXT_PUBLIC_API_KEY` in a `.env` file at the root of your project: + +```bash +NEXT_PUBLIC_API_URL= +NEXT_PUBLIC_API_KEY= +``` + +### 3. Run the Development Server + +Start the development server to see the widget in action. + +```bash +npm run dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. + +### 4. Integrate the Widget + +To integrate the widget into your webpage, you can use the `ChatWidget` component. Here’s an example of how to include it in a page component: + +```jsx +import ChatWidget from 'path/to/ChatWidget'; +function MyPage() { +return ( +
+

My Webpage

+ +
+); +} +export default MyPage; +``` + + +### 5. Deploy + +Once you are satisfied with the widget, you can build and start the application for production: + +```bash +npm run build +npm run start +``` + +### Custom Styling and Configuration + +If you need to customize the widget, you can modify the `ChatWidget` component in the `examples/widget/src/app/widget/Widget.tsx` file. + +By following these steps, you should be able to get the chat widget working on your webpage. + +If you want to get fancier, then take a peek at the Chat implementation within Danswer itself [here](https://github.com/danswer-ai/danswer/blob/main/web/src/app/chat/ChatPage.tsx#L82). \ No newline at end of file diff --git a/examples/widget/next.config.mjs b/examples/widget/next.config.mjs new file mode 100644 index 00000000000..4678774e6d6 --- /dev/null +++ b/examples/widget/next.config.mjs @@ -0,0 +1,4 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = {}; + +export default nextConfig; diff --git a/examples/widget/package-lock.json b/examples/widget/package-lock.json new file mode 100644 index 00000000000..bd7c54d2081 --- /dev/null +++ b/examples/widget/package-lock.json @@ -0,0 +1,5933 @@ +{ + "name": "widget", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "widget", + "version": "0.1.0", + "dependencies": { + "next": "14.2.5", + "react": "^18", + "react-dom": "^18", + "react-markdown": "^8.0.6" + }, + "devDependencies": { + "@types/node": "^20", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.4.19", + "eslint": "^8", + "eslint-config-next": "14.2.5", + "postcss": "^8.4.39", + "tailwindcss": "^3.4.6", + "typescript": "^5" + } + }, + "node_modules/@alloc/quick-lru": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", + "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@eslint-community/eslint-utils": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", + "integrity": "sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA==", + "dev": true, + "dependencies": { + "eslint-visitor-keys": "^3.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.11.0", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.11.0.tgz", + "integrity": "sha512-G/M/tIiMrTAxEWRfLfQJMmGNX28IxBg4PBz8XqQhqUHLFI6TL2htpIB1iQCj144V5ee/JaKyT9/WZ0MGZWfA7A==", + "dev": true, + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.4.tgz", + "integrity": "sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ==", + "dev": true, + "dependencies": { + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^9.6.0", + "globals": "^13.19.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint/js": { + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz", + "integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==", + "dev": true, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, + "node_modules/@humanwhocodes/config-array": { + "version": "0.11.14", + "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz", + "integrity": "sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg==", + "deprecated": "Use @eslint/config-array instead", + "dev": true, + "dependencies": { + "@humanwhocodes/object-schema": "^2.0.2", + "debug": "^4.3.1", + "minimatch": "^3.0.5" + }, + "engines": { + "node": ">=10.10.0" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true, + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/object-schema": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz", + "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==", + "deprecated": "Use @eslint/object-schema instead", + "dev": true + }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "dev": true, + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dev": true, + "dependencies": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@next/env": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.5.tgz", + "integrity": "sha512-/zZGkrTOsraVfYjGP8uM0p6r0BDT6xWpkjdVbcz66PJVSpwXX3yNiRycxAuDfBKGWBrZBXRuK/YVlkNgxHGwmA==" + }, + "node_modules/@next/eslint-plugin-next": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-14.2.5.tgz", + "integrity": "sha512-LY3btOpPh+OTIpviNojDpUdIbHW9j0JBYBjsIp8IxtDFfYFyORvw3yNq6N231FVqQA7n7lwaf7xHbVJlA1ED7g==", + "dev": true, + "dependencies": { + "glob": "10.3.10" + } + }, + "node_modules/@next/swc-darwin-arm64": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.5.tgz", + "integrity": "sha512-/9zVxJ+K9lrzSGli1///ujyRfon/ZneeZ+v4ptpiPoOU+GKZnm8Wj8ELWU1Pm7GHltYRBklmXMTUqM/DqQ99FQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-darwin-x64": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.5.tgz", + "integrity": "sha512-vXHOPCwfDe9qLDuq7U1OYM2wUY+KQ4Ex6ozwsKxp26BlJ6XXbHleOUldenM67JRyBfVjv371oneEvYd3H2gNSA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-gnu": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.5.tgz", + "integrity": "sha512-vlhB8wI+lj8q1ExFW8lbWutA4M2ZazQNvMWuEDqZcuJJc78iUnLdPPunBPX8rC4IgT6lIx/adB+Cwrl99MzNaA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-musl": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.5.tgz", + "integrity": "sha512-NpDB9NUR2t0hXzJJwQSGu1IAOYybsfeB+LxpGsXrRIb7QOrYmidJz3shzY8cM6+rO4Aojuef0N/PEaX18pi9OA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-gnu": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.5.tgz", + "integrity": "sha512-8XFikMSxWleYNryWIjiCX+gU201YS+erTUidKdyOVYi5qUQo/gRxv/3N1oZFCgqpesN6FPeqGM72Zve+nReVXQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-musl": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.5.tgz", + "integrity": "sha512-6QLwi7RaYiQDcRDSU/os40r5o06b5ue7Jsk5JgdRBGGp8l37RZEh9JsLSM8QF0YDsgcosSeHjglgqi25+m04IQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-arm64-msvc": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.5.tgz", + "integrity": "sha512-1GpG2VhbspO+aYoMOQPQiqc/tG3LzmsdBH0LhnDS3JrtDx2QmzXe0B6mSZZiN3Bq7IOMXxv1nlsjzoS1+9mzZw==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-ia32-msvc": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.5.tgz", + "integrity": "sha512-Igh9ZlxwvCDsu6438FXlQTHlRno4gFpJzqPjSIBZooD22tKeI4fE/YMRoHVJHmrQ2P5YL1DoZ0qaOKkbeFWeMg==", + "cpu": [ + "ia32" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-x64-msvc": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.5.tgz", + "integrity": "sha512-tEQ7oinq1/CjSG9uSTerca3v4AZ+dFa+4Yu6ihaG8Ud8ddqLQgFGcnwYls13H5X5CPDPZJdYxyeMui6muOLd4g==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@rushstack/eslint-patch": { + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.10.3.tgz", + "integrity": "sha512-qC/xYId4NMebE6w/V33Fh9gWxLgURiNYgVNObbJl2LZv0GUUItCcCqC5axQSwRaAgaxl2mELq1rMzlswaQ0Zxg==", + "dev": true + }, + "node_modules/@swc/counter": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", + "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==" + }, + "node_modules/@swc/helpers": { + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.5.tgz", + "integrity": "sha512-KGYxvIOXcceOAbEk4bi/dVLEK9z8sZ0uBB3Il5b1rhfClSpcX0yfRO0KmTkqR2cnQDymwLB+25ZyMzICg/cm/A==", + "dependencies": { + "@swc/counter": "^0.1.3", + "tslib": "^2.4.0" + } + }, + "node_modules/@types/debug": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", + "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", + "dependencies": { + "@types/ms": "*" + } + }, + "node_modules/@types/hast": { + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", + "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/@types/json5": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", + "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", + "dev": true + }, + "node_modules/@types/mdast": { + "version": "3.0.15", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.15.tgz", + "integrity": "sha512-LnwD+mUEfxWMa1QpDraczIn6k0Ee3SMicuYSSzS6ZYl2gKS09EClnJYGd8Du6rfc5r/GZEk5o1mRb8TaTj03sQ==", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/@types/ms": { + "version": "0.7.34", + "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz", + "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==" + }, + "node_modules/@types/node": { + "version": "20.14.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.11.tgz", + "integrity": "sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/prop-types": { + "version": "15.7.12", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz", + "integrity": "sha512-5zvhXYtRNRluoE/jAp4GVsSduVUzNWKkOZrCDBWYtE7biZywwdC2AcEzg+cSMLFRfVgeAFqpfNabiPjxFddV1Q==" + }, + "node_modules/@types/react": { + "version": "18.3.3", + "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.3.tgz", + "integrity": "sha512-hti/R0pS0q1/xx+TsI73XIqk26eBsISZ2R0wUijXIngRK9R/e7Xw/cXVxQK7R5JjW+SV4zGcn5hXjudkN/pLIw==", + "dependencies": { + "@types/prop-types": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/react-dom": { + "version": "18.3.0", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz", + "integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==", + "dev": true, + "dependencies": { + "@types/react": "*" + } + }, + "node_modules/@types/unist": { + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", + "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + }, + "node_modules/@typescript-eslint/parser": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.2.0.tgz", + "integrity": "sha512-5FKsVcHTk6TafQKQbuIVkXq58Fnbkd2wDL4LB7AURN7RUOu1utVP+G8+6u3ZhEroW3DF6hyo3ZEXxgKgp4KeCg==", + "dev": true, + "dependencies": { + "@typescript-eslint/scope-manager": "7.2.0", + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/typescript-estree": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0", + "debug": "^4.3.4" + }, + "engines": { + "node": "^16.0.0 || >=18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.56.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.2.0.tgz", + "integrity": "sha512-Qh976RbQM/fYtjx9hs4XkayYujB/aPwglw2choHmf3zBjB4qOywWSdt9+KLRdHubGcoSwBnXUH2sR3hkyaERRg==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0" + }, + "engines": { + "node": "^16.0.0 || >=18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/types": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.2.0.tgz", + "integrity": "sha512-XFtUHPI/abFhm4cbCDc5Ykc8npOKBSJePY3a3s+lwumt7XWJuzP5cZcfZ610MIPHjQjNsOLlYK8ASPaNG8UiyA==", + "dev": true, + "engines": { + "node": "^16.0.0 || >=18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/typescript-estree": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.2.0.tgz", + "integrity": "sha512-cyxS5WQQCoBwSakpMrvMXuMDEbhOo9bNHHrNcEWis6XHx6KF518tkF1wBvKIn/tpq5ZpUYK7Bdklu8qY0MsFIA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0", + "debug": "^4.3.4", + "globby": "^11.1.0", + "is-glob": "^4.0.3", + "minimatch": "9.0.3", + "semver": "^7.5.4", + "ts-api-utils": "^1.0.1" + }, + "engines": { + "node": "^16.0.0 || >=18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.3.tgz", + "integrity": "sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.2.0.tgz", + "integrity": "sha512-c6EIQRHhcpl6+tO8EMR+kjkkV+ugUNXOmeASA1rlzkd8EPIriavpWoiEz1HR/VLhbVIdhqnV6E7JZm00cBDx2A==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.2.0", + "eslint-visitor-keys": "^3.4.1" + }, + "engines": { + "node": "^16.0.0 || >=18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@ungap/structured-clone": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", + "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==", + "dev": true + }, + "node_modules/acorn": { + "version": "8.12.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz", + "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==", + "dev": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true, + "peerDependencies": { + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/any-promise": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "dev": true + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/arg": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", + "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==", + "dev": true + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true + }, + "node_modules/aria-query": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.1.3.tgz", + "integrity": "sha512-R5iJ5lkuHybztUfuOAznmboyjWq8O6sqNqtK7CLOqdydi54VNbORp49mb14KbWgG1QD3JFO9hJdZ+y4KutfdOQ==", + "dev": true, + "dependencies": { + "deep-equal": "^2.0.5" + } + }, + "node_modules/array-buffer-byte-length": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz", + "integrity": "sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.5", + "is-array-buffer": "^3.0.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array-includes": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz", + "integrity": "sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-object-atoms": "^1.0.0", + "get-intrinsic": "^1.2.4", + "is-string": "^1.0.7" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/array.prototype.findlast": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz", + "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-shim-unscopables": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.findlastindex": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.5.tgz", + "integrity": "sha512-zfETvRFA8o7EiNn++N5f/kaCw221hrpGsDmcpndVupkPzEc1Wuf3VgC0qby1BbHs7f5DVYjgtEU2LLh5bqeGfQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-shim-unscopables": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.flat": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.2.tgz", + "integrity": "sha512-djYB+Zx2vLewY8RWlNCUdHjDXs2XOgm602S9E7P/UpHgfeHL00cRiIF+IN/G/aUJ7kGPb6yO/ErDI5V2s8iycA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "es-abstract": "^1.22.1", + "es-shim-unscopables": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.flatmap": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.2.tgz", + "integrity": "sha512-Ewyx0c9PmpcsByhSW4r+9zDU7sGjFc86qf/kKtuSCRdhfbk0SNLLkaT5qvcHnRGgc5NP/ly/y+qkXkqONX54CQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "es-abstract": "^1.22.1", + "es-shim-unscopables": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.toreversed": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/array.prototype.toreversed/-/array.prototype.toreversed-1.1.2.tgz", + "integrity": "sha512-wwDCoT4Ck4Cz7sLtgUmzR5UV3YF5mFHUlbChCzZBQZ+0m2cl/DH3tKgvphv1nKgFsJ48oCSg6p91q2Vm0I/ZMA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "es-abstract": "^1.22.1", + "es-shim-unscopables": "^1.0.0" + } + }, + "node_modules/array.prototype.tosorted": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz", + "integrity": "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.3", + "es-errors": "^1.3.0", + "es-shim-unscopables": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/arraybuffer.prototype.slice": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.3.tgz", + "integrity": "sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A==", + "dev": true, + "dependencies": { + "array-buffer-byte-length": "^1.0.1", + "call-bind": "^1.0.5", + "define-properties": "^1.2.1", + "es-abstract": "^1.22.3", + "es-errors": "^1.2.1", + "get-intrinsic": "^1.2.3", + "is-array-buffer": "^3.0.4", + "is-shared-array-buffer": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/ast-types-flow": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz", + "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", + "dev": true + }, + "node_modules/autoprefixer": { + "version": "10.4.19", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.19.tgz", + "integrity": "sha512-BaENR2+zBZ8xXhM4pUaKUxlVdxZ0EZhjvbopwnXmxRUfqDmwSpC2lAi/QXvx7NRdPCo1WKEcEF6mV64si1z4Ew==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/autoprefixer" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "browserslist": "^4.23.0", + "caniuse-lite": "^1.0.30001599", + "fraction.js": "^4.3.7", + "normalize-range": "^0.1.2", + "picocolors": "^1.0.0", + "postcss-value-parser": "^4.2.0" + }, + "bin": { + "autoprefixer": "bin/autoprefixer" + }, + "engines": { + "node": "^10 || ^12 || >=14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/available-typed-arrays": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", + "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", + "dev": true, + "dependencies": { + "possible-typed-array-names": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/axe-core": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.9.1.tgz", + "integrity": "sha512-QbUdXJVTpvUTHU7871ppZkdOLBeGUKBQWHkHrvN2V9IQWGMt61zf3B45BtzjxEJzYuj0JBjBZP/hmYS/R9pmAw==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/axobject-query": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-3.1.1.tgz", + "integrity": "sha512-goKlv8DZrK9hUh975fnHzhNIO4jUnFCfv/dszV5VwUGDFjI6vQ2VwoyjYjYNEbBE8AH87TduWP5uyDR1D+Iteg==", + "dev": true, + "dependencies": { + "deep-equal": "^2.0.5" + } + }, + "node_modules/bail": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", + "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.23.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.2.tgz", + "integrity": "sha512-qkqSyistMYdxAcw+CzbZwlBy8AGmS/eEWs+sEV5TnLRGDOL+C5M2EnH6tlZyg0YoAxGJAFKh61En9BR941GnHA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "caniuse-lite": "^1.0.30001640", + "electron-to-chromium": "^1.4.820", + "node-releases": "^2.0.14", + "update-browserslist-db": "^1.1.0" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, + "node_modules/call-bind": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", + "dev": true, + "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/camelcase-css": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", + "integrity": "sha512-QOSvevhslijgYwRx6Rv7zKdMF8lbRmx+uQGx2+vDc+KI/eBnsy9kit5aj23AgGu3pa4t9AgwbnXWqS+iOY+2aA==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001642", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001642.tgz", + "integrity": "sha512-3XQ0DoRgLijXJErLSl+bLnJ+Et4KqV1PY6JJBGAFlsNsz31zeAIncyeZfLCabHK/jtSh+671RM9YMldxjUPZtA==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ] + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/character-entities": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", + "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "dev": true, + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/chokidar/node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/client-only": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz", + "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==" + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/commander": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", + "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/cssesc": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", + "integrity": "sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==", + "dev": true, + "bin": { + "cssesc": "bin/cssesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/csstype": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==" + }, + "node_modules/damerau-levenshtein": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz", + "integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==", + "dev": true + }, + "node_modules/data-view-buffer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.1.tgz", + "integrity": "sha512-0lht7OugA5x3iJLOWFhWK/5ehONdprk0ISXqVFn/NFrDu+cuc8iADFrGQz5BnRK7LLU3JmkbXSxaqX+/mXYtUA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/data-view-byte-length": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.1.tgz", + "integrity": "sha512-4J7wRJD3ABAzr8wP+OcIcqq2dlUKp4DVflx++hs5h5ZKydWMI6/D/fAot+yh6g2tHh8fLFTvNOaVN357NvSrOQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/data-view-byte-offset": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.0.tgz", + "integrity": "sha512-t/Ygsytq+R995EJ5PZlD4Cu56sWa8InXySaViRzw9apusqsOO2bQP+SbYzAhR0pFKoB+43lYy8rWban9JSuXnA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/debug": { + "version": "4.3.5", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz", + "integrity": "sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg==", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decode-named-character-reference": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz", + "integrity": "sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==", + "dependencies": { + "character-entities": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/deep-equal": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-2.2.3.tgz", + "integrity": "sha512-ZIwpnevOurS8bpT4192sqAowWM76JDKSHYzMLty3BZGSswgq6pBaH3DhCSW5xVAZICZyKdOBPjwww5wfgT/6PA==", + "dev": true, + "dependencies": { + "array-buffer-byte-length": "^1.0.0", + "call-bind": "^1.0.5", + "es-get-iterator": "^1.1.3", + "get-intrinsic": "^1.2.2", + "is-arguments": "^1.1.1", + "is-array-buffer": "^3.0.2", + "is-date-object": "^1.0.5", + "is-regex": "^1.1.4", + "is-shared-array-buffer": "^1.0.2", + "isarray": "^2.0.5", + "object-is": "^1.1.5", + "object-keys": "^1.1.1", + "object.assign": "^4.1.4", + "regexp.prototype.flags": "^1.5.1", + "side-channel": "^1.0.4", + "which-boxed-primitive": "^1.0.2", + "which-collection": "^1.0.1", + "which-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true + }, + "node_modules/define-data-property": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", + "dev": true, + "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/define-properties": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", + "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", + "dev": true, + "dependencies": { + "define-data-property": "^1.0.1", + "has-property-descriptors": "^1.0.0", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "engines": { + "node": ">=6" + } + }, + "node_modules/didyoumean": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", + "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==", + "dev": true + }, + "node_modules/diff": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", + "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", + "dev": true, + "dependencies": { + "path-type": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/dlv": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", + "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==", + "dev": true + }, + "node_modules/doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "dev": true + }, + "node_modules/electron-to-chromium": { + "version": "1.4.832", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.832.tgz", + "integrity": "sha512-cTen3SB0H2SGU7x467NRe1eVcQgcuS6jckKfWJHia2eo0cHIGOqHoAxevIYZD4eRHcWjkvFzo93bi3vJ9W+1lA==", + "dev": true + }, + "node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true + }, + "node_modules/enhanced-resolve": { + "version": "5.17.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.0.tgz", + "integrity": "sha512-dwDPwZL0dmye8Txp2gzFmA6sxALaSvdRDjPH0viLcKrtlOL3tw62nWWweVD1SdILDTJrbrL6tdWVN58Wo6U3eA==", + "dev": true, + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.2.0" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/es-abstract": { + "version": "1.23.3", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.3.tgz", + "integrity": "sha512-e+HfNH61Bj1X9/jLc5v1owaLYuHdeHHSQlkhCBiTK8rBvKaULl/beGMxwrMXjpYrv4pz22BlY570vVePA2ho4A==", + "dev": true, + "dependencies": { + "array-buffer-byte-length": "^1.0.1", + "arraybuffer.prototype.slice": "^1.0.3", + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", + "data-view-buffer": "^1.0.1", + "data-view-byte-length": "^1.0.1", + "data-view-byte-offset": "^1.0.0", + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-set-tostringtag": "^2.0.3", + "es-to-primitive": "^1.2.1", + "function.prototype.name": "^1.1.6", + "get-intrinsic": "^1.2.4", + "get-symbol-description": "^1.0.2", + "globalthis": "^1.0.3", + "gopd": "^1.0.1", + "has-property-descriptors": "^1.0.2", + "has-proto": "^1.0.3", + "has-symbols": "^1.0.3", + "hasown": "^2.0.2", + "internal-slot": "^1.0.7", + "is-array-buffer": "^3.0.4", + "is-callable": "^1.2.7", + "is-data-view": "^1.0.1", + "is-negative-zero": "^2.0.3", + "is-regex": "^1.1.4", + "is-shared-array-buffer": "^1.0.3", + "is-string": "^1.0.7", + "is-typed-array": "^1.1.13", + "is-weakref": "^1.0.2", + "object-inspect": "^1.13.1", + "object-keys": "^1.1.1", + "object.assign": "^4.1.5", + "regexp.prototype.flags": "^1.5.2", + "safe-array-concat": "^1.1.2", + "safe-regex-test": "^1.0.3", + "string.prototype.trim": "^1.2.9", + "string.prototype.trimend": "^1.0.8", + "string.prototype.trimstart": "^1.0.8", + "typed-array-buffer": "^1.0.2", + "typed-array-byte-length": "^1.0.1", + "typed-array-byte-offset": "^1.0.2", + "typed-array-length": "^1.0.6", + "unbox-primitive": "^1.0.2", + "which-typed-array": "^1.1.15" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-get-iterator": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/es-get-iterator/-/es-get-iterator-1.1.3.tgz", + "integrity": "sha512-sPZmqHBe6JIiTfN5q2pEi//TwxmAFHwj/XEuYjTuse78i8KxaqMTTzxPoFKuzRpDpTJ+0NAbpfenkmH2rePtuw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "get-intrinsic": "^1.1.3", + "has-symbols": "^1.0.3", + "is-arguments": "^1.1.1", + "is-map": "^2.0.2", + "is-set": "^2.0.2", + "is-string": "^1.0.7", + "isarray": "^2.0.5", + "stop-iteration-iterator": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/es-iterator-helpers": { + "version": "1.0.19", + "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.19.tgz", + "integrity": "sha512-zoMwbCcH5hwUkKJkT8kDIBZSz9I6mVG//+lDCinLCGov4+r7NIy0ld8o03M0cJxl2spVf6ESYVS6/gpIfq1FFw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.3", + "es-errors": "^1.3.0", + "es-set-tostringtag": "^2.0.3", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "globalthis": "^1.0.3", + "has-property-descriptors": "^1.0.2", + "has-proto": "^1.0.3", + "has-symbols": "^1.0.3", + "internal-slot": "^1.0.7", + "iterator.prototype": "^1.1.2", + "safe-array-concat": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.0.0.tgz", + "integrity": "sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==", + "dev": true, + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz", + "integrity": "sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.2.4", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.1" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-shim-unscopables": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/es-shim-unscopables/-/es-shim-unscopables-1.0.2.tgz", + "integrity": "sha512-J3yBRXCzDu4ULnQwxyToo/OjdMx6akgVC7K6few0a7F/0wLtmKKN7I73AH5T2836UuXRqN7Qg+IIUw/+YJksRw==", + "dev": true, + "dependencies": { + "hasown": "^2.0.0" + } + }, + "node_modules/es-to-primitive": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz", + "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==", + "dev": true, + "dependencies": { + "is-callable": "^1.1.4", + "is-date-object": "^1.0.1", + "is-symbol": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/escalade": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eslint": { + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz", + "integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==", + "dev": true, + "dependencies": { + "@eslint-community/eslint-utils": "^4.2.0", + "@eslint-community/regexpp": "^4.6.1", + "@eslint/eslintrc": "^2.1.4", + "@eslint/js": "8.57.0", + "@humanwhocodes/config-array": "^0.11.14", + "@humanwhocodes/module-importer": "^1.0.1", + "@nodelib/fs.walk": "^1.2.8", + "@ungap/structured-clone": "^1.2.0", + "ajv": "^6.12.4", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.3.2", + "doctrine": "^3.0.0", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^7.2.2", + "eslint-visitor-keys": "^3.4.3", + "espree": "^9.6.1", + "esquery": "^1.4.2", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^6.0.1", + "find-up": "^5.0.0", + "glob-parent": "^6.0.2", + "globals": "^13.19.0", + "graphemer": "^1.4.0", + "ignore": "^5.2.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "is-path-inside": "^3.0.3", + "js-yaml": "^4.1.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.2", + "natural-compare": "^1.4.0", + "optionator": "^0.9.3", + "strip-ansi": "^6.0.1", + "text-table": "^0.2.0" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-config-next": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-14.2.5.tgz", + "integrity": "sha512-zogs9zlOiZ7ka+wgUnmcM0KBEDjo4Jis7kxN1jvC0N4wynQ2MIx/KBkg4mVF63J5EK4W0QMCn7xO3vNisjaAoA==", + "dev": true, + "dependencies": { + "@next/eslint-plugin-next": "14.2.5", + "@rushstack/eslint-patch": "^1.3.3", + "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || 7.0.0 - 7.2.0", + "eslint-import-resolver-node": "^0.3.6", + "eslint-import-resolver-typescript": "^3.5.2", + "eslint-plugin-import": "^2.28.1", + "eslint-plugin-jsx-a11y": "^6.7.1", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.5.0 || 5.0.0-canary-7118f5dd7-20230705" + }, + "peerDependencies": { + "eslint": "^7.23.0 || ^8.0.0", + "typescript": ">=3.3.1" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/eslint-import-resolver-node": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz", + "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==", + "dev": true, + "dependencies": { + "debug": "^3.2.7", + "is-core-module": "^2.13.0", + "resolve": "^1.22.4" + } + }, + "node_modules/eslint-import-resolver-node/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/eslint-import-resolver-typescript": { + "version": "3.6.1", + "resolved": "https://registry.npmjs.org/eslint-import-resolver-typescript/-/eslint-import-resolver-typescript-3.6.1.tgz", + "integrity": "sha512-xgdptdoi5W3niYeuQxKmzVDTATvLYqhpwmykwsh7f6HIOStGWEIL9iqZgQDF9u9OEzrRwR8no5q2VT+bjAujTg==", + "dev": true, + "dependencies": { + "debug": "^4.3.4", + "enhanced-resolve": "^5.12.0", + "eslint-module-utils": "^2.7.4", + "fast-glob": "^3.3.1", + "get-tsconfig": "^4.5.0", + "is-core-module": "^2.11.0", + "is-glob": "^4.0.3" + }, + "engines": { + "node": "^14.18.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/unts/projects/eslint-import-resolver-ts" + }, + "peerDependencies": { + "eslint": "*", + "eslint-plugin-import": "*" + } + }, + "node_modules/eslint-module-utils": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.8.1.tgz", + "integrity": "sha512-rXDXR3h7cs7dy9RNpUlQf80nX31XWJEyGq1tRMo+6GsO5VmTe4UTwtmonAD4ZkAsrfMVDA2wlGJ3790Ys+D49Q==", + "dev": true, + "dependencies": { + "debug": "^3.2.7" + }, + "engines": { + "node": ">=4" + }, + "peerDependenciesMeta": { + "eslint": { + "optional": true + } + } + }, + "node_modules/eslint-module-utils/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/eslint-plugin-import": { + "version": "2.29.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.29.1.tgz", + "integrity": "sha512-BbPC0cuExzhiMo4Ff1BTVwHpjjv28C5R+btTOGaCRC7UEz801up0JadwkeSk5Ued6TG34uaczuVuH6qyy5YUxw==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.7", + "array.prototype.findlastindex": "^1.2.3", + "array.prototype.flat": "^1.3.2", + "array.prototype.flatmap": "^1.3.2", + "debug": "^3.2.7", + "doctrine": "^2.1.0", + "eslint-import-resolver-node": "^0.3.9", + "eslint-module-utils": "^2.8.0", + "hasown": "^2.0.0", + "is-core-module": "^2.13.1", + "is-glob": "^4.0.3", + "minimatch": "^3.1.2", + "object.fromentries": "^2.0.7", + "object.groupby": "^1.0.1", + "object.values": "^1.1.7", + "semver": "^6.3.1", + "tsconfig-paths": "^3.15.0" + }, + "engines": { + "node": ">=4" + }, + "peerDependencies": { + "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0 || ^8" + } + }, + "node_modules/eslint-plugin-import/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/eslint-plugin-import/node_modules/doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/eslint-plugin-import/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/eslint-plugin-jsx-a11y": { + "version": "6.9.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.9.0.tgz", + "integrity": "sha512-nOFOCaJG2pYqORjK19lqPqxMO/JpvdCZdPtNdxY3kvom3jTvkAbOvQvD8wuD0G8BYR0IGAGYDlzqWJOh/ybn2g==", + "dev": true, + "dependencies": { + "aria-query": "~5.1.3", + "array-includes": "^3.1.8", + "array.prototype.flatmap": "^1.3.2", + "ast-types-flow": "^0.0.8", + "axe-core": "^4.9.1", + "axobject-query": "~3.1.1", + "damerau-levenshtein": "^1.0.8", + "emoji-regex": "^9.2.2", + "es-iterator-helpers": "^1.0.19", + "hasown": "^2.0.2", + "jsx-ast-utils": "^3.3.5", + "language-tags": "^1.0.9", + "minimatch": "^3.1.2", + "object.fromentries": "^2.0.8", + "safe-regex-test": "^1.0.3", + "string.prototype.includes": "^2.0.0" + }, + "engines": { + "node": ">=4.0" + }, + "peerDependencies": { + "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8" + } + }, + "node_modules/eslint-plugin-react": { + "version": "7.34.4", + "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.34.4.tgz", + "integrity": "sha512-Np+jo9bUwJNxCsT12pXtrGhJgT3T44T1sHhn1Ssr42XFn8TES0267wPGo5nNrMHi8qkyimDAX2BUmkf9pSaVzA==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.8", + "array.prototype.findlast": "^1.2.5", + "array.prototype.flatmap": "^1.3.2", + "array.prototype.toreversed": "^1.1.2", + "array.prototype.tosorted": "^1.1.4", + "doctrine": "^2.1.0", + "es-iterator-helpers": "^1.0.19", + "estraverse": "^5.3.0", + "hasown": "^2.0.2", + "jsx-ast-utils": "^2.4.1 || ^3.0.0", + "minimatch": "^3.1.2", + "object.entries": "^1.1.8", + "object.fromentries": "^2.0.8", + "object.values": "^1.2.0", + "prop-types": "^15.8.1", + "resolve": "^2.0.0-next.5", + "semver": "^6.3.1", + "string.prototype.matchall": "^4.0.11", + "string.prototype.repeat": "^1.0.0" + }, + "engines": { + "node": ">=4" + }, + "peerDependencies": { + "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8" + } + }, + "node_modules/eslint-plugin-react-hooks": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.6.2.tgz", + "integrity": "sha512-QzliNJq4GinDBcD8gPB5v0wh6g8q3SUi6EFF0x8N/BL9PoVs0atuGc47ozMRyOWAKdwaZ5OnbOEa3WR+dSGKuQ==", + "dev": true, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0" + } + }, + "node_modules/eslint-plugin-react/node_modules/doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/eslint-plugin-react/node_modules/resolve": { + "version": "2.0.0-next.5", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz", + "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==", + "dev": true, + "dependencies": { + "is-core-module": "^2.13.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/eslint-plugin-react/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/eslint-scope": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz", + "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==", + "dev": true, + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", + "dev": true, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/espree": { + "version": "9.6.1", + "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz", + "integrity": "sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==", + "dev": true, + "dependencies": { + "acorn": "^8.9.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^3.4.1" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/esquery": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", + "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", + "dev": true, + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "node_modules/fast-glob": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.4" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fast-glob/node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true + }, + "node_modules/fastq": { + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", + "dev": true, + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "dependencies": { + "flat-cache": "^3.0.4" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "dependencies": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/flat-cache": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.2.0.tgz", + "integrity": "sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw==", + "dev": true, + "dependencies": { + "flatted": "^3.2.9", + "keyv": "^4.5.3", + "rimraf": "^3.0.2" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/flatted": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz", + "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", + "dev": true + }, + "node_modules/for-each": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", + "integrity": "sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==", + "dev": true, + "dependencies": { + "is-callable": "^1.1.3" + } + }, + "node_modules/foreground-child": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.2.1.tgz", + "integrity": "sha512-PXUUyLqrR2XCWICfv6ukppP96sdFwWbNEnfEMt7jNsISjMsvaLNinAHNDYyvkyU+SZG2BTSbT5NjG+vZslfGTA==", + "dev": true, + "dependencies": { + "cross-spawn": "^7.0.0", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/fraction.js": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", + "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==", + "dev": true, + "engines": { + "node": "*" + }, + "funding": { + "type": "patreon", + "url": "https://github.com/sponsors/rawify" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/function.prototype.name": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.6.tgz", + "integrity": "sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.2.0", + "es-abstract": "^1.22.1", + "functions-have-names": "^1.2.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/functions-have-names": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz", + "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", + "dev": true, + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "has-proto": "^1.0.1", + "has-symbols": "^1.0.3", + "hasown": "^2.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-symbol-description": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.0.2.tgz", + "integrity": "sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.5", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-tsconfig": { + "version": "4.7.6", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.6.tgz", + "integrity": "sha512-ZAqrLlu18NbDdRaHq+AKXzAmqIUPswPWKUchfytdAjiRFnCe5ojG2bstg6mRiZabkKfCoL/e98pbBELIV/YCeA==", + "dev": true, + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/glob": { + "version": "10.3.10", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz", + "integrity": "sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==", + "dev": true, + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^2.3.5", + "minimatch": "^9.0.1", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0", + "path-scurry": "^1.10.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/glob/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/glob/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/globals": { + "version": "13.24.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz", + "integrity": "sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==", + "dev": true, + "dependencies": { + "type-fest": "^0.20.2" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/globalthis": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", + "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", + "dev": true, + "dependencies": { + "define-properties": "^1.2.1", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/globby": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz", + "integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==", + "dev": true, + "dependencies": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.2.9", + "ignore": "^5.2.0", + "merge2": "^1.4.1", + "slash": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/gopd": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", + "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.1.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==" + }, + "node_modules/graphemer": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", + "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", + "dev": true + }, + "node_modules/has-bigints": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz", + "integrity": "sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/has-property-descriptors": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", + "dev": true, + "dependencies": { + "es-define-property": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-proto": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", + "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz", + "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "dev": true, + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/hast-util-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-2.0.1.tgz", + "integrity": "sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/ignore": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.1.tgz", + "integrity": "sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==", + "dev": true, + "engines": { + "node": ">= 4" + } + }, + "node_modules/import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "dev": true, + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "node_modules/inline-style-parser": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.1.1.tgz", + "integrity": "sha512-7NXolsK4CAS5+xvdj5OMMbI962hU/wvwoxk+LWR9Ek9bVtyuuYScDN6eS0rUm6TxApFpw7CX1o4uJzcd4AyD3Q==" + }, + "node_modules/internal-slot": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.7.tgz", + "integrity": "sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g==", + "dev": true, + "dependencies": { + "es-errors": "^1.3.0", + "hasown": "^2.0.0", + "side-channel": "^1.0.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/is-arguments": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.1.tgz", + "integrity": "sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-array-buffer": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.4.tgz", + "integrity": "sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "get-intrinsic": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-async-function": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.0.0.tgz", + "integrity": "sha512-Y1JXKrfykRJGdlDwdKlLpLyMIiWqWvuSd17TvZk68PLAOGOoF4Xyav1z0Xhoi+gCYjZVeC5SI+hYFOfvXmGRCA==", + "dev": true, + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-bigint": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.4.tgz", + "integrity": "sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg==", + "dev": true, + "dependencies": { + "has-bigints": "^1.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-boolean-object": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.2.tgz", + "integrity": "sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-buffer": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", + "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "engines": { + "node": ">=4" + } + }, + "node_modules/is-callable": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", + "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-core-module": { + "version": "2.15.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.15.0.tgz", + "integrity": "sha512-Dd+Lb2/zvk9SKy1TGCt1wFJFo/MWBPMX5x7KcvLajWTGuomczdQX61PvY5yK6SVACwpoexWo81IfFyoKY2QnTA==", + "dev": true, + "dependencies": { + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-data-view": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.1.tgz", + "integrity": "sha512-AHkaJrsUVW6wq6JS8y3JnM/GJF/9cf+k20+iDzlSaJrinEo5+7vRiteOSwBhHRiAyQATN1AmY4hwzxJKPmYf+w==", + "dev": true, + "dependencies": { + "is-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-date-object": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.5.tgz", + "integrity": "sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ==", + "dev": true, + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-finalizationregistry": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.0.2.tgz", + "integrity": "sha512-0by5vtUJs8iFQb5TYUHHPudOR+qXYIMKtiUzvLIZITZUjknFmziyBJuLhVRc+Ds0dREFlskDNJKYIdIzu/9pfw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-generator-function": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz", + "integrity": "sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==", + "dev": true, + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-map": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", + "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-negative-zero": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.3.tgz", + "integrity": "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-number-object": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.7.tgz", + "integrity": "sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ==", + "dev": true, + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-path-inside": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz", + "integrity": "sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-plain-obj": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", + "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-regex": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz", + "integrity": "sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-set": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz", + "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-shared-array-buffer": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.3.tgz", + "integrity": "sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-string": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.7.tgz", + "integrity": "sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg==", + "dev": true, + "dependencies": { + "has-tostringtag": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-symbol": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz", + "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==", + "dev": true, + "dependencies": { + "has-symbols": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-typed-array": { + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.13.tgz", + "integrity": "sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==", + "dev": true, + "dependencies": { + "which-typed-array": "^1.1.14" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-weakmap": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", + "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-weakref": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.0.2.tgz", + "integrity": "sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-weakset": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.3.tgz", + "integrity": "sha512-LvIm3/KWzS9oRFHugab7d+M/GcBXuXX5xZkzPmN+NxihdQlZUQ4dWuSV1xR/sq6upL1TJEDrfBgRepHFdBtSNQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/isarray": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz", + "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", + "dev": true + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "node_modules/iterator.prototype": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/iterator.prototype/-/iterator.prototype-1.1.2.tgz", + "integrity": "sha512-DR33HMMr8EzwuRL8Y9D3u2BMj8+RqSE850jfGu59kS7tbmPLzGkZmVSfyCFSDxuZiEY6Rzt3T2NA/qU+NwVj1w==", + "dev": true, + "dependencies": { + "define-properties": "^1.2.1", + "get-intrinsic": "^1.2.1", + "has-symbols": "^1.0.3", + "reflect.getprototypeof": "^1.0.4", + "set-function-name": "^2.0.1" + } + }, + "node_modules/jackspeak": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-2.3.6.tgz", + "integrity": "sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==", + "dev": true, + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, + "node_modules/jiti": { + "version": "1.21.6", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.6.tgz", + "integrity": "sha512-2yTgeWTWzMWkHu6Jp9NKgePDaYHbntiwvYuuJLbbN9vl7DC9DvXKOB2BC3ZZ92D3cvV/aflH0osDfwpHepQ53w==", + "dev": true, + "bin": { + "jiti": "bin/jiti.js" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dev": true, + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true + }, + "node_modules/json5": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", + "dev": true, + "dependencies": { + "minimist": "^1.2.0" + }, + "bin": { + "json5": "lib/cli.js" + } + }, + "node_modules/jsx-ast-utils": { + "version": "3.3.5", + "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz", + "integrity": "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.6", + "array.prototype.flat": "^1.3.1", + "object.assign": "^4.1.4", + "object.values": "^1.1.6" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, + "dependencies": { + "json-buffer": "3.0.1" + } + }, + "node_modules/kleur": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz", + "integrity": "sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==", + "engines": { + "node": ">=6" + } + }, + "node_modules/language-subtag-registry": { + "version": "0.3.23", + "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.23.tgz", + "integrity": "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==", + "dev": true + }, + "node_modules/language-tags": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.9.tgz", + "integrity": "sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA==", + "dev": true, + "dependencies": { + "language-subtag-registry": "^0.3.20" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/lilconfig": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", + "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true + }, + "node_modules/locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "dependencies": { + "p-locate": "^5.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true + }, + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, + "node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true + }, + "node_modules/mdast-util-definitions": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz", + "integrity": "sha512-8SVPMuHqlPME/z3gqVwWY4zVXn8lqKv/pAhC57FuJ40ImXyBpmO5ukh98zB2v7Blql2FiHjHv9LVztSIqjY+MA==", + "dependencies": { + "@types/mdast": "^3.0.0", + "@types/unist": "^2.0.0", + "unist-util-visit": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-from-markdown": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-1.3.1.tgz", + "integrity": "sha512-4xTO/M8c82qBcnQc1tgpNtubGUW/Y1tBQ1B0i5CtSoelOLKFYlElIr3bvgREYYO5iRqbMY1YuqZng0GVOI8Qww==", + "dependencies": { + "@types/mdast": "^3.0.0", + "@types/unist": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "mdast-util-to-string": "^3.1.0", + "micromark": "^3.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-decode-string": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "unist-util-stringify-position": "^3.0.0", + "uvu": "^0.5.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-hast": { + "version": "12.3.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-12.3.0.tgz", + "integrity": "sha512-pits93r8PhnIoU4Vy9bjW39M2jJ6/tdHyja9rrot9uujkN7UTU9SDnE6WNJz/IGyQk3XHX6yNNtrBH6cQzm8Hw==", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-definitions": "^5.0.0", + "micromark-util-sanitize-uri": "^1.1.0", + "trim-lines": "^3.0.0", + "unist-util-generated": "^2.0.0", + "unist-util-position": "^4.0.0", + "unist-util-visit": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-string": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-3.2.0.tgz", + "integrity": "sha512-V4Zn/ncyN1QNSqSBxTrMOLpjr+IKdHl2v3KVLoWmDPscP4r9GcCi71gjgvUV1SFSKh92AjAG4peFuBl2/YgCJg==", + "dependencies": { + "@types/mdast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromark": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-3.2.0.tgz", + "integrity": "sha512-uD66tJj54JLYq0De10AhWycZWGQNUvDI55xPgk2sQM5kn1JYlhbCMTtEeT27+vAhW2FBQxLlOmS3pmA7/2z4aA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "@types/debug": "^4.0.0", + "debug": "^4.0.0", + "decode-named-character-reference": "^1.0.0", + "micromark-core-commonmark": "^1.0.1", + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-chunked": "^1.0.0", + "micromark-util-combine-extensions": "^1.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-encode": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-resolve-all": "^1.0.0", + "micromark-util-sanitize-uri": "^1.0.0", + "micromark-util-subtokenize": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.1", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-core-commonmark": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-1.1.0.tgz", + "integrity": "sha512-BgHO1aRbolh2hcrzL2d1La37V0Aoz73ymF8rAcKnohLy93titmv62E0gP8Hrx9PKcKrqCZ1BbLGbP3bEhoXYlw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-factory-destination": "^1.0.0", + "micromark-factory-label": "^1.0.0", + "micromark-factory-space": "^1.0.0", + "micromark-factory-title": "^1.0.0", + "micromark-factory-whitespace": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-chunked": "^1.0.0", + "micromark-util-classify-character": "^1.0.0", + "micromark-util-html-tag-name": "^1.0.0", + "micromark-util-normalize-identifier": "^1.0.0", + "micromark-util-resolve-all": "^1.0.0", + "micromark-util-subtokenize": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.1", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-factory-destination": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-1.1.0.tgz", + "integrity": "sha512-XaNDROBgx9SgSChd69pjiGKbV+nfHGDPVYFs5dOoDd7ZnMAE+Cuu91BCpsY8RT2NP9vo/B8pds2VQNCLiu0zhg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-label": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-1.1.0.tgz", + "integrity": "sha512-OLtyez4vZo/1NjxGhcpDSbHQ+m0IIGnT8BoPamh+7jVlzLJBH98zzuCoUeMxvM6WsNeh8wx8cKvqLiPHEACn0w==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-factory-space": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-1.1.0.tgz", + "integrity": "sha512-cRzEj7c0OL4Mw2v6nwzttyOZe8XY/Z8G0rzmWQZTBi/jjwyw/U4uqKtUORXQrR5bAZZnbTI/feRV/R7hc4jQYQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-title": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-1.1.0.tgz", + "integrity": "sha512-J7n9R3vMmgjDOCY8NPw55jiyaQnH5kBdV2/UXCtZIpnHH3P6nHUKaH7XXEYuWwx/xUJcawa8plLBEjMPU24HzQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-factory-whitespace": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-1.1.0.tgz", + "integrity": "sha512-v2WlmiymVSp5oMg+1Q0N1Lxmt6pMhIHD457whWM7/GUlEks1hI9xj5w3zbc4uuMKXGisksZk8DzP2UyGbGqNsQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-factory-space": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-character": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-1.2.0.tgz", + "integrity": "sha512-lXraTwcX3yH/vMDaFWCQJP1uIszLVebzUa3ZHdrgxr7KEU/9mL4mVgCpGbyhvNLNlauROiNUq7WN5u7ndbY6xg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-chunked": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-1.1.0.tgz", + "integrity": "sha512-Ye01HXpkZPNcV6FiyoW2fGZDUw4Yc7vT0E9Sad83+bEDiCJ1uXu0S3mr8WLpsz3HaG3x2q0HM6CTuPdcZcluFQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-classify-character": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-1.1.0.tgz", + "integrity": "sha512-SL0wLxtKSnklKSUplok1WQFoGhUdWYKggKUiqhX+Swala+BtptGCu5iPRc+xvzJ4PXE/hwM3FNXsfEVgoZsWbw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-combine-extensions": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-1.1.0.tgz", + "integrity": "sha512-Q20sp4mfNf9yEqDL50WwuWZHUrCO4fEyeDCnMGmG5Pr0Cz15Uo7KBs6jq+dq0EgX4DPwwrh9m0X+zPV1ypFvUA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-chunked": "^1.0.0", + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-decode-numeric-character-reference": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-1.1.0.tgz", + "integrity": "sha512-m9V0ExGv0jB1OT21mrWcuf4QhP46pH1KkfWy9ZEezqHKAxkj4mPCy3nIH1rkbdMlChLHX531eOrymlwyZIf2iw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-decode-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-1.1.0.tgz", + "integrity": "sha512-YphLGCK8gM1tG1bd54azwyrQRjCFcmgj2S2GoJDNnh4vYtnL38JS8M4gpxzOPNyHdNEpheyWXCTnnTDY3N+NVQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-util-character": "^1.0.0", + "micromark-util-decode-numeric-character-reference": "^1.0.0", + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-1.1.0.tgz", + "integrity": "sha512-EuEzTWSTAj9PA5GOAs992GzNh2dGQO52UvAbtSOMvXTxv3Criqb6IOzJUBCmEqrrXSblJIJBbFFv6zPxpreiJw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromark-util-html-tag-name": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-1.2.0.tgz", + "integrity": "sha512-VTQzcuQgFUD7yYztuQFKXT49KghjtETQ+Wv/zUjGSGBioZnkA4P1XXZPT1FHeJA6RwRXSF47yvJ1tsJdoxwO+Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromark-util-normalize-identifier": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-1.1.0.tgz", + "integrity": "sha512-N+w5vhqrBihhjdpM8+5Xsxy71QWqGn7HYNUvch71iV2PM7+E3uWGox1Qp90loa1ephtCxG2ftRV/Conitc6P2Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-resolve-all": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-1.1.0.tgz", + "integrity": "sha512-b/G6BTMSg+bX+xVCshPTPyAu2tmA0E4X98NSR7eIbeC6ycCqCeE7wjfDIgzEbkzdEVJXRtOG4FbEm/uGbCRouA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-types": "^1.0.0" + } + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-1.2.0.tgz", + "integrity": "sha512-QO4GXv0XZfWey4pYFndLUKEAktKkG5kZTdUNaTAkzbuJxn2tNBOr+QtxR2XpWaMhbImT2dPzyLrPXLlPhph34A==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^1.0.0", + "micromark-util-encode": "^1.0.0", + "micromark-util-symbol": "^1.0.0" + } + }, + "node_modules/micromark-util-subtokenize": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-1.1.0.tgz", + "integrity": "sha512-kUQHyzRoxvZO2PuLzMt2P/dwVsTiivCK8icYTeR+3WgbuPqfHgPPy7nFKbeqRivBvn/3N3GBiNC+JRTMSxEC7A==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-chunked": "^1.0.0", + "micromark-util-symbol": "^1.0.0", + "micromark-util-types": "^1.0.0", + "uvu": "^0.5.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-1.1.0.tgz", + "integrity": "sha512-uEjpEYY6KMs1g7QfJ2eX1SQEV+ZT4rUD3UcF6l57acZvLNK7PBZL+ty82Z1qhK1/yXIY4bdx04FKMgR0g4IAag==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromark-util-types": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-1.1.0.tgz", + "integrity": "sha512-ukRBgie8TIAcacscVHSiddHjO4k/q3pnedmzMQ4iwDcK0FtFCohKOlFbaOL/mPgfnPsL3C1ZyxJa4sbWrBl3jg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromatch": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", + "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", + "dev": true, + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/mri": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/mri/-/mri-1.2.0.tgz", + "integrity": "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==", + "engines": { + "node": ">=4" + } + }, + "node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + }, + "node_modules/mz": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", + "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "dev": true, + "dependencies": { + "any-promise": "^1.0.0", + "object-assign": "^4.0.1", + "thenify-all": "^1.0.0" + } + }, + "node_modules/nanoid": { + "version": "3.3.7", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", + "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, + "node_modules/next": { + "version": "14.2.5", + "resolved": "https://registry.npmjs.org/next/-/next-14.2.5.tgz", + "integrity": "sha512-0f8aRfBVL+mpzfBjYfQuLWh2WyAwtJXCRfkPF4UJ5qd2YwrHczsrSzXU4tRMV0OAxR8ZJZWPFn6uhSC56UTsLA==", + "dependencies": { + "@next/env": "14.2.5", + "@swc/helpers": "0.5.5", + "busboy": "1.6.0", + "caniuse-lite": "^1.0.30001579", + "graceful-fs": "^4.2.11", + "postcss": "8.4.31", + "styled-jsx": "5.1.1" + }, + "bin": { + "next": "dist/bin/next" + }, + "engines": { + "node": ">=18.17.0" + }, + "optionalDependencies": { + "@next/swc-darwin-arm64": "14.2.5", + "@next/swc-darwin-x64": "14.2.5", + "@next/swc-linux-arm64-gnu": "14.2.5", + "@next/swc-linux-arm64-musl": "14.2.5", + "@next/swc-linux-x64-gnu": "14.2.5", + "@next/swc-linux-x64-musl": "14.2.5", + "@next/swc-win32-arm64-msvc": "14.2.5", + "@next/swc-win32-ia32-msvc": "14.2.5", + "@next/swc-win32-x64-msvc": "14.2.5" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.1.0", + "@playwright/test": "^1.41.2", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "sass": "^1.3.0" + }, + "peerDependenciesMeta": { + "@opentelemetry/api": { + "optional": true + }, + "@playwright/test": { + "optional": true + }, + "sass": { + "optional": true + } + } + }, + "node_modules/next/node_modules/postcss": { + "version": "8.4.31", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", + "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.6", + "picocolors": "^1.0.0", + "source-map-js": "^1.0.2" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/node-releases": { + "version": "2.0.17", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.17.tgz", + "integrity": "sha512-Ww6ZlOiEQfPfXM45v17oabk77Z7mg5bOt7AjDyzy7RjK9OrLrLC8dyZQoAPEOtFX9SaNf1Tdvr5gRJWdTJj7GA==", + "dev": true + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/normalize-range": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", + "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-hash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", + "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/object-inspect": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", + "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object-is": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/object-is/-/object-is-1.1.6.tgz", + "integrity": "sha512-F8cZ+KfGlSGi09lJT7/Nd6KJZ9ygtvYC0/UYYLI9nmQKLMnydpB9yvbv9K1uSkEu7FU9vYPmVwLg328tX+ot3Q==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.assign": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.5.tgz", + "integrity": "sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.5", + "define-properties": "^1.2.1", + "has-symbols": "^1.0.3", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object.entries": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.8.tgz", + "integrity": "sha512-cmopxi8VwRIAw/fkijJohSfpef5PdN0pMQJN6VC/ZKvn0LIknWD8KtgY6KlQdEc4tIjcQ3HxSMmnvtzIscdaYQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.fromentries": { + "version": "2.0.8", + "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz", + "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object.groupby": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz", + "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.values": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.0.tgz", + "integrity": "sha512-yBYjY9QX2hnRmZHAjG/f13MzmBzxzYgQhFrke06TTyKY5zSTEqkOeukBzIdVA3j3ulu8Qa3MbVFShV7T2RmGtQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "dependencies": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "dependencies": { + "p-limit": "^3.0.2" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "dev": true + }, + "node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dev": true, + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/picocolors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz", + "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==" + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pify": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", + "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/pirates": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", + "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/possible-typed-array-names": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz", + "integrity": "sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/postcss": { + "version": "8.4.39", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.39.tgz", + "integrity": "sha512-0vzE+lAiG7hZl1/9I8yzKLx3aR9Xbof3fBHKunvMfOCYAtMhrsnccJY2iTURb9EZd5+pLuiNV9/c/GZJOHsgIw==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.7", + "picocolors": "^1.0.1", + "source-map-js": "^1.2.0" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/postcss-import": { + "version": "15.1.0", + "resolved": "https://registry.npmjs.org/postcss-import/-/postcss-import-15.1.0.tgz", + "integrity": "sha512-hpr+J05B2FVYUAXHeK1YyI267J/dDDhMU6B6civm8hSY1jYJnBXxzKDKDswzJmtLHryrjhnDjqqp/49t8FALew==", + "dev": true, + "dependencies": { + "postcss-value-parser": "^4.0.0", + "read-cache": "^1.0.0", + "resolve": "^1.1.7" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "postcss": "^8.0.0" + } + }, + "node_modules/postcss-js": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/postcss-js/-/postcss-js-4.0.1.tgz", + "integrity": "sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==", + "dev": true, + "dependencies": { + "camelcase-css": "^2.0.1" + }, + "engines": { + "node": "^12 || ^14 || >= 16" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + "peerDependencies": { + "postcss": "^8.4.21" + } + }, + "node_modules/postcss-load-config": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", + "integrity": "sha512-bSVhyJGL00wMVoPUzAVAnbEoWyqRxkjv64tUl427SKnPrENtq6hJwUojroMz2VB+Q1edmi4IfrAPpami5VVgMQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "lilconfig": "^3.0.0", + "yaml": "^2.3.4" + }, + "engines": { + "node": ">= 14" + }, + "peerDependencies": { + "postcss": ">=8.0.9", + "ts-node": ">=9.0.0" + }, + "peerDependenciesMeta": { + "postcss": { + "optional": true + }, + "ts-node": { + "optional": true + } + } + }, + "node_modules/postcss-load-config/node_modules/lilconfig": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz", + "integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, + "node_modules/postcss-nested": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/postcss-nested/-/postcss-nested-6.2.0.tgz", + "integrity": "sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "postcss-selector-parser": "^6.1.1" + }, + "engines": { + "node": ">=12.0" + }, + "peerDependencies": { + "postcss": "^8.2.14" + } + }, + "node_modules/postcss-selector-parser": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.1.tgz", + "integrity": "sha512-b4dlw/9V8A71rLIDsSwVmak9z2DuBUB7CA1/wSdelNEzqsjoSPeADTWNO09lpH49Diy3/JIZ2bSPB1dI3LJCHg==", + "dev": true, + "dependencies": { + "cssesc": "^3.0.0", + "util-deprecate": "^1.0.2" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", + "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", + "dev": true + }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/prop-types": { + "version": "15.8.1", + "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", + "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", + "dependencies": { + "loose-envify": "^1.4.0", + "object-assign": "^4.1.1", + "react-is": "^16.13.1" + } + }, + "node_modules/property-information": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", + "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/react": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", + "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "dependencies": { + "loose-envify": "^1.1.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-dom": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", + "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "dependencies": { + "loose-envify": "^1.1.0", + "scheduler": "^0.23.2" + }, + "peerDependencies": { + "react": "^18.3.1" + } + }, + "node_modules/react-is": { + "version": "16.13.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", + "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" + }, + "node_modules/react-markdown": { + "version": "8.0.6", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-8.0.6.tgz", + "integrity": "sha512-KgPWsYgHuftdx510wwIzpwf+5js/iHqBR+fzxefv8Khk3mFbnioF1bmL2idHN3ler0LMQmICKeDrWnZrX9mtbQ==", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/prop-types": "^15.0.0", + "@types/unist": "^2.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-whitespace": "^2.0.0", + "prop-types": "^15.0.0", + "property-information": "^6.0.0", + "react-is": "^18.0.0", + "remark-parse": "^10.0.0", + "remark-rehype": "^10.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-object": "^0.4.0", + "unified": "^10.0.0", + "unist-util-visit": "^4.0.0", + "vfile": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=16", + "react": ">=16" + } + }, + "node_modules/react-markdown/node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==" + }, + "node_modules/read-cache": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", + "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", + "dev": true, + "dependencies": { + "pify": "^2.3.0" + } + }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, + "node_modules/reflect.getprototypeof": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz", + "integrity": "sha512-fmfw4XgoDke3kdI6h4xcUz1dG8uaiv5q9gcEwLS4Pnth2kxT+GZ7YehS1JTMGBQmtV7Y4GFGbs2re2NqhdozUg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.1", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "globalthis": "^1.0.3", + "which-builtin-type": "^1.1.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/regexp.prototype.flags": { + "version": "1.5.2", + "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz", + "integrity": "sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.6", + "define-properties": "^1.2.1", + "es-errors": "^1.3.0", + "set-function-name": "^2.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/remark-parse": { + "version": "10.0.2", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-10.0.2.tgz", + "integrity": "sha512-3ydxgHa/ZQzG8LvC7jTXccARYDcRld3VfcgIIFs7bI6vbRSxJJmzgLEIIoYKyrfhaY+ujuWaf/PJiMZXoiCXgw==", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-from-markdown": "^1.0.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-rehype": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-10.1.0.tgz", + "integrity": "sha512-EFmR5zppdBp0WQeDVZ/b66CWJipB2q2VLNFMabzDSGR66Z2fQii83G5gTBbgGEnEEA0QRussvrFHxk1HWGJskw==", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-to-hast": "^12.1.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/resolve": { + "version": "1.22.8", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", + "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==", + "dev": true, + "dependencies": { + "is-core-module": "^2.13.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", + "dev": true, + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, + "node_modules/sade": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/sade/-/sade-1.8.1.tgz", + "integrity": "sha512-xal3CZX1Xlo/k4ApwCFrHVACi9fBqJ7V+mwhBsuf/1IOKbBy098Fex+Wa/5QMubw09pSZ/u8EY8PWgevJsXp1A==", + "dependencies": { + "mri": "^1.1.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/safe-array-concat": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.2.tgz", + "integrity": "sha512-vj6RsCsWBCf19jIeHEfkRMw8DPiBb+DMXklQ/1SGDHOMlHdPUkZXFQ2YdplS23zESTijAcurb1aSgJA3AgMu1Q==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "get-intrinsic": "^1.2.4", + "has-symbols": "^1.0.3", + "isarray": "^2.0.5" + }, + "engines": { + "node": ">=0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/safe-regex-test": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.3.tgz", + "integrity": "sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", + "is-regex": "^1.1.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/scheduler": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", + "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", + "dependencies": { + "loose-envify": "^1.1.0" + } + }, + "node_modules/semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/set-function-length": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", + "dev": true, + "dependencies": { + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "gopd": "^1.0.1", + "has-property-descriptors": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/set-function-name": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz", + "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==", + "dev": true, + "dependencies": { + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", + "functions-have-names": "^1.2.3", + "has-property-descriptors": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/source-map-js": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/stop-iteration-iterator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/stop-iteration-iterator/-/stop-iteration-iterator-1.0.0.tgz", + "integrity": "sha512-iCGQj+0l0HOdZ2AEeBADlsRC+vsnDsZsbdSiH1yNSjcfKM7fdpCMfqAL/dwF5BLiw/XhRft/Wax6zQbhq2BcjQ==", + "dev": true, + "dependencies": { + "internal-slot": "^1.0.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dev": true, + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/string-width/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/string-width/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/string.prototype.includes": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.0.tgz", + "integrity": "sha512-E34CkBgyeqNDcrbU76cDjL5JLcVrtSdYq0MEh/B10r17pRP4ciHLwTgnuLV8Ay6cgEMLkcBkFCKyFZ43YldYzg==", + "dev": true, + "dependencies": { + "define-properties": "^1.1.3", + "es-abstract": "^1.17.5" + } + }, + "node_modules/string.prototype.matchall": { + "version": "4.0.11", + "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.11.tgz", + "integrity": "sha512-NUdh0aDavY2og7IbBPenWqR9exH+E26Sv8e0/eTe1tltDGZL+GtBkDAnnyBtmekfK6/Dq3MkcGtzXFEd1LQrtg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "get-intrinsic": "^1.2.4", + "gopd": "^1.0.1", + "has-symbols": "^1.0.3", + "internal-slot": "^1.0.7", + "regexp.prototype.flags": "^1.5.2", + "set-function-name": "^2.0.2", + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/string.prototype.repeat": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz", + "integrity": "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==", + "dev": true, + "dependencies": { + "define-properties": "^1.1.3", + "es-abstract": "^1.17.5" + } + }, + "node_modules/string.prototype.trim": { + "version": "1.2.9", + "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.9.tgz", + "integrity": "sha512-klHuCNxiMZ8MlsOihJhJEBJAiMVqU3Z2nEXWfWnIqjN0gEFS9J9+IxKozWWtQGcgoa1WUZzLjKPTr4ZHNFTFxw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.0", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/string.prototype.trimend": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.8.tgz", + "integrity": "sha512-p73uL5VCHCO2BZZ6krwwQE3kCzM7NKmis8S//xEC6fQonchbum4eP6kR4DLEjQFO3Wnj3Fuo8NM0kOSjVdHjZQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/string.prototype.trimstart": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz", + "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/style-to-object": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.4.tgz", + "integrity": "sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==", + "dependencies": { + "inline-style-parser": "0.1.1" + } + }, + "node_modules/styled-jsx": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.1.tgz", + "integrity": "sha512-pW7uC1l4mBZ8ugbiZrcIsiIvVx1UmTfw7UkC3Um2tmfUq9Bhk8IiyEIPl6F8agHgjzku6j0xQEZbfA5uSgSaCw==", + "dependencies": { + "client-only": "0.0.1" + }, + "engines": { + "node": ">= 12.0.0" + }, + "peerDependencies": { + "react": ">= 16.8.0 || 17.x.x || ^18.0.0-0" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + }, + "babel-plugin-macros": { + "optional": true + } + } + }, + "node_modules/sucrase": { + "version": "3.35.0", + "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", + "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", + "dev": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.2", + "commander": "^4.0.0", + "glob": "^10.3.10", + "lines-and-columns": "^1.1.6", + "mz": "^2.7.0", + "pirates": "^4.0.1", + "ts-interface-checker": "^0.1.9" + }, + "bin": { + "sucrase": "bin/sucrase", + "sucrase-node": "bin/sucrase-node" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/tailwindcss": { + "version": "3.4.6", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.6.tgz", + "integrity": "sha512-1uRHzPB+Vzu57ocybfZ4jh5Q3SdlH7XW23J5sQoM9LhE9eIOlzxer/3XPSsycvih3rboRsvt0QCmzSrqyOYUIA==", + "dev": true, + "dependencies": { + "@alloc/quick-lru": "^5.2.0", + "arg": "^5.0.2", + "chokidar": "^3.5.3", + "didyoumean": "^1.2.2", + "dlv": "^1.1.3", + "fast-glob": "^3.3.0", + "glob-parent": "^6.0.2", + "is-glob": "^4.0.3", + "jiti": "^1.21.0", + "lilconfig": "^2.1.0", + "micromatch": "^4.0.5", + "normalize-path": "^3.0.0", + "object-hash": "^3.0.0", + "picocolors": "^1.0.0", + "postcss": "^8.4.23", + "postcss-import": "^15.1.0", + "postcss-js": "^4.0.1", + "postcss-load-config": "^4.0.1", + "postcss-nested": "^6.0.1", + "postcss-selector-parser": "^6.0.11", + "resolve": "^1.22.2", + "sucrase": "^3.32.0" + }, + "bin": { + "tailwind": "lib/cli.js", + "tailwindcss": "lib/cli.js" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tapable": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", + "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true + }, + "node_modules/thenify": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", + "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "dev": true, + "dependencies": { + "any-promise": "^1.0.0" + } + }, + "node_modules/thenify-all": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", + "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "dev": true, + "dependencies": { + "thenify": ">= 3.1.0 < 4" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/trough": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/ts-api-utils": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", + "integrity": "sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ==", + "dev": true, + "engines": { + "node": ">=16" + }, + "peerDependencies": { + "typescript": ">=4.2.0" + } + }, + "node_modules/ts-interface-checker": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", + "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", + "dev": true + }, + "node_modules/tsconfig-paths": { + "version": "3.15.0", + "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz", + "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==", + "dev": true, + "dependencies": { + "@types/json5": "^0.0.29", + "json5": "^1.0.2", + "minimist": "^1.2.6", + "strip-bom": "^3.0.0" + } + }, + "node_modules/tslib": { + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz", + "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==" + }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/typed-array-buffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz", + "integrity": "sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "is-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/typed-array-byte-length": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.1.tgz", + "integrity": "sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/typed-array-byte-offset": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.2.tgz", + "integrity": "sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA==", + "dev": true, + "dependencies": { + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/typed-array-length": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.6.tgz", + "integrity": "sha512-/OxDN6OtAk5KBpGb28T+HZc2M+ADtvRxXrKKbUwtsLgdoxgX13hyy7ek6bFRl5+aBs2yZzB0c4CnQfAtVypW/g==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13", + "possible-typed-array-names": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/typescript": { + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.3.tgz", + "integrity": "sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/unbox-primitive": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", + "integrity": "sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "has-bigints": "^1.0.2", + "has-symbols": "^1.0.3", + "which-boxed-primitive": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + }, + "node_modules/unified": { + "version": "10.1.2", + "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", + "integrity": "sha512-pUSWAi/RAnVy1Pif2kAoeWNBa3JVrx0MId2LASj8G+7AiHWoKZNTomq6LG326T68U7/e263X6fTdcXIy7XnF7Q==", + "dependencies": { + "@types/unist": "^2.0.0", + "bail": "^2.0.0", + "extend": "^3.0.0", + "is-buffer": "^2.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-generated": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-2.0.1.tgz", + "integrity": "sha512-qF72kLmPxAw0oN2fwpWIqbXAVyEqUzDHMsbtPvOudIlUzXYFIeQIuxXQCRCFh22B7cixvU0MG7m3MW8FTq/S+A==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", + "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-4.0.4.tgz", + "integrity": "sha512-kUBE91efOWfIVBo8xzh/uZQ7p9ffYRtUbMRZBNFYwf0RK8koUMx6dGUfwylLOKmaT2cs4wSW96QoYUSXAyEtpg==", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-3.0.3.tgz", + "integrity": "sha512-k5GzIBZ/QatR8N5X2y+drfpWG8IDBzdnVj6OInRNWm1oXrzydiaAT2OQiA8DPRRZyAKb9b6I2a6PxYklZD0gKg==", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", + "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.1.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", + "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/update-browserslist-db": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz", + "integrity": "sha512-EdRAaAyk2cUE1wOf2DkEhzxqOQvFOoRJFNS6NeyJ01Gp2beMRpBAINjM2iDXE3KCuKhwnvHIQCJm6ThL2Z+HzQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "escalade": "^3.1.2", + "picocolors": "^1.0.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "dev": true + }, + "node_modules/uvu": { + "version": "0.5.6", + "resolved": "https://registry.npmjs.org/uvu/-/uvu-0.5.6.tgz", + "integrity": "sha512-+g8ENReyr8YsOc6fv/NVJs2vFdHBnBNdfE49rshrTzDWOlUx4Gq7KOS2GD8eqhy2j+Ejq29+SbKH8yjkAqXqoA==", + "dependencies": { + "dequal": "^2.0.0", + "diff": "^5.0.0", + "kleur": "^4.0.3", + "sade": "^1.7.3" + }, + "bin": { + "uvu": "bin.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/vfile": { + "version": "5.3.7", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-5.3.7.tgz", + "integrity": "sha512-r7qlzkgErKjobAmyNIkkSpizsFPYiUPuJb5pNW1RB4JcYVZhs4lIbVqk8XPk033CV/1z8ss5pkax8SuhGpcG8g==", + "dependencies": { + "@types/unist": "^2.0.0", + "is-buffer": "^2.0.0", + "unist-util-stringify-position": "^3.0.0", + "vfile-message": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-3.1.4.tgz", + "integrity": "sha512-fa0Z6P8HUrQN4BZaX05SIVXic+7kE3b05PWAtPuYP9QLHsLKYR7/AlLW3NtOrpXRLeawpDLMsVkmk5DG0NXgWw==", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-stringify-position": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/which-boxed-primitive": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz", + "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==", + "dev": true, + "dependencies": { + "is-bigint": "^1.0.1", + "is-boolean-object": "^1.1.0", + "is-number-object": "^1.0.4", + "is-string": "^1.0.5", + "is-symbol": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/which-builtin-type": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.1.3.tgz", + "integrity": "sha512-YmjsSMDBYsM1CaFiayOVT06+KJeXf0o5M/CAd4o1lTadFAtacTUM49zoYxr/oroopFDfhvN6iEcBxUyc3gvKmw==", + "dev": true, + "dependencies": { + "function.prototype.name": "^1.1.5", + "has-tostringtag": "^1.0.0", + "is-async-function": "^2.0.0", + "is-date-object": "^1.0.5", + "is-finalizationregistry": "^1.0.2", + "is-generator-function": "^1.0.10", + "is-regex": "^1.1.4", + "is-weakref": "^1.0.2", + "isarray": "^2.0.5", + "which-boxed-primitive": "^1.0.2", + "which-collection": "^1.0.1", + "which-typed-array": "^1.1.9" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/which-collection": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz", + "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==", + "dev": true, + "dependencies": { + "is-map": "^2.0.3", + "is-set": "^2.0.3", + "is-weakmap": "^2.0.2", + "is-weakset": "^2.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/which-typed-array": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.15.tgz", + "integrity": "sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==", + "dev": true, + "dependencies": { + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-tostringtag": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "node_modules/yaml": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.5.tgz", + "integrity": "sha512-aBx2bnqDzVOyNKfsysjA2ms5ZlnjSAW2eG3/L5G/CSujfjLJTJsEw1bGw8kCf04KodQWk1pxlGnZ56CRxiawmg==", + "dev": true, + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/examples/widget/package.json b/examples/widget/package.json new file mode 100644 index 00000000000..4ceb4893084 --- /dev/null +++ b/examples/widget/package.json @@ -0,0 +1,28 @@ +{ + "name": "widget", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "next": "14.2.5", + "react": "^18", + "react-dom": "^18", + "react-markdown": "^8.0.6" + }, + "devDependencies": { + "@types/node": "^20", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.4.19", + "eslint": "^8", + "eslint-config-next": "14.2.5", + "postcss": "^8.4.39", + "tailwindcss": "^3.4.6", + "typescript": "^5" + } +} diff --git a/examples/widget/postcss.config.mjs b/examples/widget/postcss.config.mjs new file mode 100644 index 00000000000..1a69fd2a450 --- /dev/null +++ b/examples/widget/postcss.config.mjs @@ -0,0 +1,8 @@ +/** @type {import('postcss-load-config').Config} */ +const config = { + plugins: { + tailwindcss: {}, + }, +}; + +export default config; diff --git a/examples/widget/src/app/globals.css b/examples/widget/src/app/globals.css new file mode 100644 index 00000000000..b5c61c95671 --- /dev/null +++ b/examples/widget/src/app/globals.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/examples/widget/src/app/layout.tsx b/examples/widget/src/app/layout.tsx new file mode 100644 index 00000000000..2cabefe9681 --- /dev/null +++ b/examples/widget/src/app/layout.tsx @@ -0,0 +1,23 @@ +import type { Metadata } from "next"; +import { Inter } from "next/font/google"; + +import "./globals.css"; + +const inter = Inter({ subsets: ["latin"] }); + +export const metadata: Metadata = { + title: "Example Danswer Widget", + description: "Example Danswer Widget", +}; + +export default function RootLayout({ + children, +}: Readonly<{ + children: React.ReactNode; +}>) { + return ( + + {children} + + ); +} diff --git a/examples/widget/src/app/page.tsx b/examples/widget/src/app/page.tsx new file mode 100644 index 00000000000..945cbee37db --- /dev/null +++ b/examples/widget/src/app/page.tsx @@ -0,0 +1,9 @@ +import { ChatWidget } from "./widget/Widget"; + +export default function Home() { + return ( +
+ +
+ ); +} diff --git a/examples/widget/src/app/widget/Widget.tsx b/examples/widget/src/app/widget/Widget.tsx new file mode 100644 index 00000000000..44654993c84 --- /dev/null +++ b/examples/widget/src/app/widget/Widget.tsx @@ -0,0 +1,344 @@ +"use client"; + +import React, { useState } from "react"; +import ReactMarkdown from "react-markdown"; + +const API_URL = process.env.NEXT_PUBLIC_API_URL || "http://localhost:8080"; +const API_KEY = process.env.NEXT_PUBLIC_API_KEY || ""; + +type NonEmptyObject = { [k: string]: any }; + +const processSingleChunk = ( + chunk: string, + currPartialChunk: string | null, +): [T | null, string | null] => { + const completeChunk = (currPartialChunk || "") + chunk; + try { + // every complete chunk should be valid JSON + const chunkJson = JSON.parse(completeChunk); + return [chunkJson, null]; + } catch (err) { + // if it's not valid JSON, then it's probably an incomplete chunk + return [null, completeChunk]; + } +}; + +const processRawChunkString = ( + rawChunkString: string, + previousPartialChunk: string | null, +): [T[], string | null] => { + /* This is required because, in practice, we see that nginx does not send over + each chunk one at a time even with buffering turned off. Instead, + chunks are sometimes in batches or are sometimes incomplete */ + if (!rawChunkString) { + return [[], null]; + } + const chunkSections = rawChunkString + .split("\n") + .filter((chunk) => chunk.length > 0); + let parsedChunkSections: T[] = []; + let currPartialChunk = previousPartialChunk; + chunkSections.forEach((chunk) => { + const [processedChunk, partialChunk] = processSingleChunk( + chunk, + currPartialChunk, + ); + if (processedChunk) { + parsedChunkSections.push(processedChunk); + currPartialChunk = null; + } else { + currPartialChunk = partialChunk; + } + }); + + return [parsedChunkSections, currPartialChunk]; +}; + +async function* handleStream( + streamingResponse: Response, +): AsyncGenerator { + const reader = streamingResponse.body?.getReader(); + const decoder = new TextDecoder("utf-8"); + + let previousPartialChunk: string | null = null; + while (true) { + const rawChunk = await reader?.read(); + if (!rawChunk) { + throw new Error("Unable to process chunk"); + } + const { done, value } = rawChunk; + if (done) { + break; + } + + const [completedChunks, partialChunk] = processRawChunkString( + decoder.decode(value, { stream: true }), + previousPartialChunk, + ); + if (!completedChunks.length && !partialChunk) { + break; + } + previousPartialChunk = partialChunk as string | null; + + yield await Promise.resolve(completedChunks); + } +} + +async function* sendMessage({ + message, + chatSessionId, + parentMessageId, +}: { + message: string; + chatSessionId?: number; + parentMessageId?: number; +}) { + if (!chatSessionId || !parentMessageId) { + // Create a new chat session if one doesn't exist + const createSessionResponse = await fetch( + `${API_URL}/chat/create-chat-session`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${API_KEY}`, + }, + body: JSON.stringify({ + // or specify an assistant you have defined + persona_id: 0, + }), + }, + ); + + if (!createSessionResponse.ok) { + const errorJson = await createSessionResponse.json(); + const errorMsg = errorJson.message || errorJson.detail || ""; + throw Error(`Failed to create chat session - ${errorMsg}`); + } + + const sessionData = await createSessionResponse.json(); + chatSessionId = sessionData.chat_session_id; + } + + const sendMessageResponse = await fetch(`${API_URL}/chat/send-message`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${API_KEY}`, + }, + body: JSON.stringify({ + chat_session_id: chatSessionId, + parent_message_id: parentMessageId, + message: message, + prompt_id: null, + search_doc_ids: null, + file_descriptors: [], + // checkout https://github.com/danswer-ai/danswer/blob/main/backend/danswer/search/models.py#L105 for + // all available options + retrieval_options: { + run_search: "always", + filters: null, + }, + query_override: null, + }), + }); + if (!sendMessageResponse.ok) { + const errorJson = await sendMessageResponse.json(); + const errorMsg = errorJson.message || errorJson.detail || ""; + throw Error(`Failed to send message - ${errorMsg}`); + } + + yield* handleStream(sendMessageResponse); +} + +export const ChatWidget = () => { + const [messages, setMessages] = useState<{ text: string; isUser: boolean }[]>( + [], + ); + const [inputText, setInputText] = useState(""); + const [isLoading, setIsLoading] = useState(false); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (inputText.trim()) { + const initialPrevMessages = messages; + setMessages([...initialPrevMessages, { text: inputText, isUser: true }]); + setInputText(""); + setIsLoading(true); + + try { + const messageGenerator = sendMessage({ + message: inputText, + chatSessionId: undefined, + parentMessageId: undefined, + }); + let fullResponse = ""; + + for await (const chunks of messageGenerator) { + for (const chunk of chunks) { + if ("answer_piece" in chunk) { + fullResponse += chunk.answer_piece; + setMessages([ + ...initialPrevMessages, + { text: inputText, isUser: true }, + { text: fullResponse, isUser: false }, + ]); + } + } + } + } catch (error) { + console.error("Error sending message:", error); + setMessages((prevMessages) => [ + ...prevMessages, + { text: "An error occurred. Please try again.", isUser: false }, + ]); + } finally { + setIsLoading(false); + } + } + }; + + return ( +
+
+ Chat Support +
+
+ {messages.map((message, index) => ( +
+
+ {message.text} +
+
+ ))} + {isLoading && ( +
+
+
+
+
+
+
+ )} +
+
+
+ setInputText(e.target.value)} + placeholder="Type a message..." + className=" + w-full + p-2 + pr-10 + border + border-gray-300 + rounded-full + focus:outline-none + focus:ring-2 + focus:ring-blue-500 + focus:border-transparent + " + disabled={isLoading} + /> + +
+
+
+ ); +}; diff --git a/examples/widget/tailwind.config.ts b/examples/widget/tailwind.config.ts new file mode 100644 index 00000000000..e9a0944e7b3 --- /dev/null +++ b/examples/widget/tailwind.config.ts @@ -0,0 +1,20 @@ +import type { Config } from "tailwindcss"; + +const config: Config = { + content: [ + "./src/pages/**/*.{js,ts,jsx,tsx,mdx}", + "./src/components/**/*.{js,ts,jsx,tsx,mdx}", + "./src/app/**/*.{js,ts,jsx,tsx,mdx}", + ], + theme: { + extend: { + backgroundImage: { + "gradient-radial": "radial-gradient(var(--tw-gradient-stops))", + "gradient-conic": + "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))", + }, + }, + }, + plugins: [], +}; +export default config; diff --git a/examples/widget/tsconfig.json b/examples/widget/tsconfig.json new file mode 100644 index 00000000000..7b285893049 --- /dev/null +++ b/examples/widget/tsconfig.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [ + { + "name": "next" + } + ], + "paths": { + "@/*": ["./src/*"] + } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/web/Dockerfile b/web/Dockerfile index 585b1fb756a..4ffced0da49 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -46,6 +46,9 @@ ENV NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PRED ARG NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS ENV NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS} +ARG NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN +ENV NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN} + ARG NEXT_PUBLIC_THEME ENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME} @@ -106,6 +109,9 @@ ENV NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME} ARG NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED ENV NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED} +ARG NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN +ENV NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN} + ARG NEXT_PUBLIC_DISABLE_LOGOUT ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT} diff --git a/web/public/Clickup.svg b/web/public/Clickup.svg index 18f875d5cc8..21b8dab6c59 100644 --- a/web/public/Clickup.svg +++ b/web/public/Clickup.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/web/public/Cohere.svg b/web/public/Cohere.svg new file mode 100644 index 00000000000..42d4eb1845a --- /dev/null +++ b/web/public/Cohere.svg @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/public/Google.webp b/web/public/Google.webp new file mode 100644 index 00000000000..7b903159b0c Binary files /dev/null and b/web/public/Google.webp differ diff --git a/web/public/Mixedbread.png b/web/public/Mixedbread.png new file mode 100644 index 00000000000..5a2f720ca27 Binary files /dev/null and b/web/public/Mixedbread.png differ diff --git a/web/public/Openai.svg b/web/public/Openai.svg index e04db75a5bb..c0bcb8bc125 100644 --- a/web/public/Openai.svg +++ b/web/public/Openai.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/web/public/Voyage.png b/web/public/Voyage.png new file mode 100644 index 00000000000..63901a6e4e8 Binary files /dev/null and b/web/public/Voyage.png differ diff --git a/web/public/microsoft.png b/web/public/microsoft.png new file mode 100644 index 00000000000..11feffc1d82 Binary files /dev/null and b/web/public/microsoft.png differ diff --git a/web/public/nomic.svg b/web/public/nomic.svg new file mode 100644 index 00000000000..aa32de0381e --- /dev/null +++ b/web/public/nomic.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/web/public/r2.png b/web/public/r2.png new file mode 100644 index 00000000000..958b3bf4992 Binary files /dev/null and b/web/public/r2.png differ diff --git a/web/public/r2.webp b/web/public/r2.webp deleted file mode 100644 index d79134ce7d8..00000000000 Binary files a/web/public/r2.webp and /dev/null differ diff --git a/web/src/app/admin/add-connector/page.tsx b/web/src/app/admin/add-connector/page.tsx index d6ad8324ef3..bf7032b5f90 100644 --- a/web/src/app/admin/add-connector/page.tsx +++ b/web/src/app/admin/add-connector/page.tsx @@ -1,12 +1,20 @@ +"use client"; import { SourceIcon } from "@/components/SourceIcon"; import { AdminPageTitle } from "@/components/admin/Title"; import { ConnectorIcon } from "@/components/icons/icons"; import { SourceCategory, SourceMetadata } from "@/lib/search/interfaces"; import { listSourceMetadata } from "@/lib/sources"; -import { Title, Text } from "@tremor/react"; +import { Title, Text, Button } from "@tremor/react"; import Link from "next/link"; +import { useEffect, useMemo, useRef, useState } from "react"; -function SourceTile({ sourceMetadata }: { sourceMetadata: SourceMetadata }) { +function SourceTile({ + sourceMetadata, + preSelect, +}: { + sourceMetadata: SourceMetadata; + preSelect?: boolean; +}) { return ( @@ -30,61 +38,125 @@ function SourceTile({ sourceMetadata }: { sourceMetadata: SourceMetadata }) { ); } - export default function Page() { - const sources = listSourceMetadata(); + const sources = useMemo(() => listSourceMetadata(), []); + const [searchTerm, setSearchTerm] = useState(""); - const importedKnowledgeSources = sources.filter( - (source) => source.category === SourceCategory.ImportedKnowledge - ); - const appConnectionSources = sources.filter( - (source) => source.category === SourceCategory.AppConnection - ); + const searchInputRef = useRef(null); + + useEffect(() => { + if (searchInputRef.current) { + searchInputRef.current.focus(); + } + }, []); + const filterSources = (sources: SourceMetadata[]) => { + if (!searchTerm) return sources; + const lowerSearchTerm = searchTerm.toLowerCase(); + return sources.filter( + (source) => + source.displayName.toLowerCase().includes(lowerSearchTerm) || + source.category.toLowerCase().includes(lowerSearchTerm) + ); + }; + + const categorizedSources = useMemo(() => { + const filtered = filterSources(sources); + return Object.values(SourceCategory).reduce( + (acc, category) => { + acc[category] = sources.filter( + (source) => + source.category === category && + (filtered.includes(source) || + category.toLowerCase().includes(searchTerm.toLowerCase())) + ); + return acc; + }, + {} as Record + ); + }, [sources, searchTerm]); + const handleKeyPress = (e: React.KeyboardEvent) => { + if (e.key === "Enter") { + const filteredCategories = Object.entries(categorizedSources).filter( + ([_, sources]) => sources.length > 0 + ); + if ( + filteredCategories.length > 0 && + filteredCategories[0][1].length > 0 + ) { + const firstSource = filteredCategories[0][1][0]; + if (firstSource) { + window.open(firstSource.adminUrl, "_self"); + } + } + } + }; return (
} title="Add Connector" + farRightElement={ + + + + } /> - - Connect Danswer to your organization's knowledge sources. - We'll automatically sync your data into Danswer, so you can find - exactly what you're looking for in one place. - - -
- Import Knowledge -
- - Connect to pieces of knowledge that live outside your apps. Upload - files, scrape websites, or connect to your organization's Google - Site. - -
- {importedKnowledgeSources.map((source) => { - return ( - - ); - })} -
+ setSearchTerm(e.target.value)} + onKeyDown={handleKeyPress} + className="flex mt-2 max-w-sm h-9 w-full rounded-md border-2 border border-input bg-transparent px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" + /> -
- Setup Auto-Syncing from Apps -
- - Setup auto-syncing from your organization's most used apps and - services. Unless otherwise specified during the connector setup, we will - pull in the latest updates from the source every 10 minutes. - -
- {appConnectionSources.map((source) => { - return ( - - ); - })} -
+ {Object.entries(categorizedSources) + .filter(([_, sources]) => sources.length > 0) + .map(([category, sources], categoryInd) => ( +
+
+ {category} +
+ {getCategoryDescription(category as SourceCategory)} +
+ {sources.map((source, sourceInd) => ( + 0 && categoryInd == 0 && sourceInd == 0 + } + key={source.internalName} + sourceMetadata={source} + /> + ))} +
+
+ ))}
); } + +function getCategoryDescription(category: SourceCategory): string { + switch (category) { + case SourceCategory.Messaging: + return "Integrate with messaging and communication platforms."; + case SourceCategory.ProjectManagement: + return "Link to project management and task tracking tools."; + case SourceCategory.CustomerSupport: + return "Connect to customer support and helpdesk systems."; + case SourceCategory.CodeRepository: + return "Integrate with code repositories and version control systems."; + case SourceCategory.Storage: + return "Connect to cloud storage and file hosting services."; + case SourceCategory.Wiki: + return "Link to wiki and knowledge base platforms."; + case SourceCategory.Other: + return "Connect to other miscellaneous knowledge sources."; + default: + return "Connect to various knowledge sources."; + } +} diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx index 665af18743e..d478922e516 100644 --- a/web/src/app/admin/assistants/AssistantEditor.tsx +++ b/web/src/app/admin/assistants/AssistantEditor.tsx @@ -1,7 +1,10 @@ "use client"; -import { CCPairBasicInfo, DocumentSet, User, UserGroup } from "@/lib/types"; -import { Button, Divider, Italic, Text } from "@tremor/react"; +import { generateRandomIconShape, createSVG } from "@/lib/assistantIconUtils"; + +import { CCPairBasicInfo, DocumentSet, User, UserRole } from "@/lib/types"; +import { Button, Divider, Italic } from "@tremor/react"; +import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector"; import { ArrayHelpers, ErrorMessage, @@ -9,41 +12,52 @@ import { FieldArray, Form, Formik, + FormikProps, } from "formik"; -import * as Yup from "yup"; -import { buildFinalPrompt, createPersona, updatePersona } from "./lib"; -import { useRouter } from "next/navigation"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { Persona, StarterMessage } from "./interfaces"; -import Link from "next/link"; -import { useEffect, useState } from "react"; import { BooleanFormField, Label, SelectorFormField, TextFormField, } from "@/components/admin/connectors/Field"; -import CollapsibleSection from "./CollapsibleSection"; -import { FiInfo, FiPlus, FiX } from "react-icons/fi"; -import { useUserGroups } from "@/lib/hooks"; -import { Bubble } from "@/components/Bubble"; -import { GroupsIcon } from "@/components/icons/icons"; -import { SuccessfulPersonaUpdateRedirectType } from "./enums"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { getDisplayNameForModel } from "@/lib/hooks"; import { DocumentSetSelectable } from "@/components/documentSet/DocumentSetSelectable"; -import { FullLLMProvider } from "../models/llm/interfaces"; import { Option } from "@/components/Dropdown"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { addAssistantToList } from "@/lib/assistants/updateAssistantPreferences"; +import { useUserGroups } from "@/lib/hooks"; +import { checkLLMSupportsImageInput, destructureValue } from "@/lib/llm/utils"; import { ToolSnapshot } from "@/lib/tools/interfaces"; import { checkUserIsNoAuthUser } from "@/lib/user"; -import { addAssistantToList } from "@/lib/assistants/updateAssistantPreferences"; -import { checkLLMSupportsImageInput } from "@/lib/llm/utils"; -import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; + import { - TooltipProvider, Tooltip, TooltipContent, + TooltipProvider, TooltipTrigger, } from "@radix-ui/react-tooltip"; +import Link from "next/link"; +import { useRouter } from "next/navigation"; +import { useEffect, useState } from "react"; +import { FiInfo, FiPlus, FiX } from "react-icons/fi"; +import * as Yup from "yup"; +import { FullLLMProvider } from "../configuration/llm/interfaces"; +import CollapsibleSection from "./CollapsibleSection"; +import { SuccessfulPersonaUpdateRedirectType } from "./enums"; +import { Persona, StarterMessage } from "./interfaces"; +import { buildFinalPrompt, createPersona, updatePersona } from "./lib"; +import { Popover } from "@/components/popover/Popover"; +import { + CameraIcon, + NewChatIcon, + SwapIcon, + TrashIcon, +} from "@/components/icons/icons"; +import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; +import { buildImgUrl } from "@/app/chat/files/images/utils"; +import { LlmList } from "@/components/llm/LLMList"; function findSearchTool(tools: ToolSnapshot[]) { return tools.find((tool) => tool.in_code_tool_id === "SearchTool"); @@ -58,7 +72,11 @@ function findInternetSearchTool(tools: ToolSnapshot[]) { } function SubLabel({ children }: { children: string | JSX.Element }) { - return
{children}
; + return ( +
+ {children} +
+ ); } export function AssistantEditor({ @@ -71,6 +89,7 @@ export function AssistantEditor({ llmProviders, tools, shouldAddAssistantToUserPreferences, + admin, }: { existingPersona?: Persona | null; ccPairs: CCPairBasicInfo[]; @@ -81,10 +100,38 @@ export function AssistantEditor({ llmProviders: FullLLMProvider[]; tools: ToolSnapshot[]; shouldAddAssistantToUserPreferences?: boolean; + admin?: boolean; }) { const router = useRouter(); const { popup, setPopup } = usePopup(); + const colorOptions = [ + "#FF6FBF", + "#6FB1FF", + "#B76FFF", + "#FFB56F", + "#6FFF8D", + "#FF6F6F", + "#6FFFFF", + ]; + + const [showAdvancedOptions, setShowAdvancedOptions] = useState(false); + + // state to persist across formik reformatting + const [defautIconColor, _setDeafultIconColor] = useState( + colorOptions[Math.floor(Math.random() * colorOptions.length)] + ); + + const [defaultIconShape, setDefaultIconShape] = useState(null); + + useEffect(() => { + if (defaultIconShape === null) { + setDefaultIconShape(generateRandomIconShape().encodedGrid); + } + }, []); + + const [isIconDropdownOpen, setIsIconDropdownOpen] = useState(false); + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); // EE only @@ -92,6 +139,7 @@ export function AssistantEditor({ const [finalPrompt, setFinalPrompt] = useState(""); const [finalPromptError, setFinalPromptError] = useState(""); + const [removePersonaImage, setRemovePersonaImage] = useState(false); const triggerFinalPromptUpdate = async ( systemPrompt: string, @@ -138,7 +186,7 @@ export function AssistantEditor({ llmProviders.forEach((llmProvider) => { const providerOptions = llmProvider.model_names.map((modelName) => { return { - name: modelName, + name: getDisplayNameForModel(modelName), value: modelName, }; }); @@ -192,16 +240,16 @@ export function AssistantEditor({ existingPersona?.llm_model_version_override ?? null, starter_messages: existingPersona?.starter_messages ?? [], enabled_tools_map: enabledToolsMap, - // search_tool_enabled: existingPersona - // ? personaCurrentToolIds.includes(searchTool!.id) - // : ccPairs.length > 0, - // image_generation_tool_enabled: imageGenerationTool - // ? personaCurrentToolIds.includes(imageGenerationTool.id) - // : false, + icon_color: existingPersona?.icon_color ?? defautIconColor, + icon_shape: existingPersona?.icon_shape ?? defaultIconShape, + uploaded_image: null, + // EE Only groups: existingPersona?.groups ?? [], }; + const [isRequestSuccessful, setIsRequestSuccessful] = useState(false); + return (
{popup} @@ -227,17 +275,26 @@ export function AssistantEditor({ llm_model_provider_override: Yup.string().nullable(), starter_messages: Yup.array().of( Yup.object().shape({ - name: Yup.string().required(), - description: Yup.string().required(), - message: Yup.string().required(), + name: Yup.string().required( + "Each starter message must have a name" + ), + description: Yup.string().required( + "Each starter message must have a description" + ), + message: Yup.string().required( + "Each starter message must have a message" + ), }) ), + icon_color: Yup.string(), + icon_shape: Yup.number(), + uploaded_image: Yup.mixed().nullable(), // EE Only groups: Yup.array().of(Yup.number()), }) .test( "system-prompt-or-task-prompt", - "Must provide either System Prompt or Additional Instructions", + "Must provide either Instructions or Reminders (Advanced)", function (values) { const systemPromptSpecified = values.system_prompt && values.system_prompt.trim().length > 0; @@ -251,7 +308,7 @@ export function AssistantEditor({ return this.createError({ path: "system_prompt", message: - "Must provide either System Prompt or Additional Instructions", + "Must provide either Instructions or Reminders (Advanced)", }); } )} @@ -277,7 +334,6 @@ export function AssistantEditor({ } formikHelpers.setSubmitting(true); - let enabledTools = Object.keys(values.enabled_tools_map) .map((toolId) => Number(toolId)) .filter((toolId) => values.enabled_tools_map[toolId]); @@ -324,6 +380,7 @@ export function AssistantEditor({ user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined, groups, tool_ids: enabledTools, + remove_image: removePersonaImage, }); } else { [promptResponse, personaResponse] = await createPersona({ @@ -381,10 +438,16 @@ export function AssistantEditor({ ? `/admin/assistants?u=${Date.now()}` : `/chat?assistantId=${assistantId}` ); + setIsRequestSuccessful(true); } }} > - {({ isSubmitting, values, setFieldValue }) => { + {({ + isSubmitting, + values, + setFieldValue, + ...formikProps + }: FormikProps) => { function toggleToolInValues(toolId: number) { const updatedEnabledToolsMap = { ...values.enabled_tools_map, @@ -400,62 +463,216 @@ export function AssistantEditor({ } return ( -
-
- - - { - setFieldValue("system_prompt", e.target.value); - triggerFinalPromptUpdate( - e.target.value, - values.task_prompt, - searchToolEnabled() - ); - }} - error={finalPromptError} - /> + +
+ setIsIconDropdownOpen(!isIconDropdownOpen)} + > + {values.uploaded_image ? ( + Uploaded assistant icon + ) : existingPersona?.uploaded_image_id && + !removePersonaImage ? ( + Uploaded assistant icon + ) : ( + createSVG( + { + encodedGrid: values.icon_shape, + filledSquares: 0, + }, + values.icon_color, + undefined, + true + ) + )} +
+ } + popover={ +
+ + + {values.uploaded_image && ( + + )} + + {!values.uploaded_image && + (!existingPersona?.uploaded_image_id || + removePersonaImage) && ( + + )} -
-
-
- LLM Provider{" "} + {existingPersona?.uploaded_image_id && + removePersonaImage && + !values.uploaded_image && ( + + )} + + {existingPersona?.uploaded_image_id && + !removePersonaImage && + !values.uploaded_image && ( + + )}
- - - - - - -

- Select a Large Language Model (Generative AI model) - to power this Assistant -

-
-
-
+ } + align="start" + side="bottom" + /> + + + + + + +

+ This icon will visually represent your Assistant +

+
+
+
+
+ + + + + + { + setFieldValue("system_prompt", e.target.value); + triggerFinalPromptUpdate( + e.target.value, + values.task_prompt, + searchToolEnabled() + ); + }} + error={finalPromptError} + /> + +
+
+
+ Default AI Model{" "}
+ + + + + + +

+ Select a Large Language Model (Generative AI model) to + power this Assistant +

+
+
+
+
+

+ Your assistant will use the user's set default unless + otherwise specified below. + {admin && + user?.preferences.default_model && + ` Your current (user-specific) default model is ${getDisplayNameForModel(destructureValue(user?.preferences?.default_model!).modelName)}`} +

+ {admin ? (
({ name: llmProvider.name, @@ -489,228 +706,317 @@ export function AssistantEditor({
)}
+ ) : ( +
+ { + if (value !== null) { + const { modelName, provider, name } = + destructureValue(value); + setFieldValue( + "llm_model_version_override", + modelName + ); + setFieldValue("llm_model_provider_override", name); + } else { + setFieldValue("llm_model_version_override", null); + setFieldValue("llm_model_provider_override", null); + } + }} + /> +
+ )} +
+
+
+
+ Capabilities{" "} +
+ + + + + + +

+ You can give your assistant advanced capabilities like + image generation +

+
+
+
+
+ Advanced +
-
-
-
- Capabilities{" "} -
+
+ {imageGenerationTool && ( - - + +
+ { + toggleToolInValues(imageGenerationTool.id); + }} + disabled={ + !checkLLMSupportsImageInput( + providerDisplayNameToProviderName.get( + values.llm_model_provider_override || "" + ) || "", + values.llm_model_version_override || "" + ) + } + /> +
- -

- You can give your assistant advanced capabilities - like image generation -

-
+ {!checkLLMSupportsImageInput( + providerDisplayNameToProviderName.get( + values.llm_model_provider_override || "" + ) || "", + values.llm_model_version_override || "" + ) && ( + +

+ To use Image Generation, select GPT-4o or another + image compatible model as the default model for + this Assistant. +

+
+ )}
-
- Advanced -
-
+ )} -
- {imageGenerationTool && - checkLLMSupportsImageInput( - providerDisplayNameToProviderName.get( - values.llm_model_provider_override || "" - ) || - defaultProviderName || - "", - values.llm_model_version_override || - defaultModelName || - "" - ) && ( - { - toggleToolInValues(imageGenerationTool.id); - }} - /> - )} + {searchTool && ( + + + +
+ { + setFieldValue("num_chunks", null); + toggleToolInValues(searchTool.id); + }} + disabled={ccPairs.length === 0} + /> +
+
+ {ccPairs.length === 0 && ( + +

+ To use the Search Tool, you need to have at least + one Connector-Credential pair configured. +

+
+ )} +
+
+ )} - {ccPairs.length > 0 && searchTool && ( - <> - { - setFieldValue("num_chunks", null); - toggleToolInValues(searchTool.id); - }} - /> + {ccPairs.length > 0 && searchTool && ( + <> + {searchToolEnabled() && ( + +
+ {ccPairs.length > 0 && ( + <> + +
+ + <> + Select which{" "} + {!user || user.role === "admin" ? ( + + Document Sets + + ) : ( + "Document Sets" + )}{" "} + this Assistant should search through. If + none are specified, the Assistant will + search through all available documents in + order to try and respond to queries. + + +
- {searchToolEnabled() && ( - -
- {ccPairs.length > 0 && ( - <> - -
- + {documentSets.length > 0 ? ( + ( +
+
+ {documentSets.map((documentSet) => { + const ind = + values.document_set_ids.indexOf( + documentSet.id + ); + let isSelected = ind !== -1; + return ( + { + if (isSelected) { + arrayHelpers.remove(ind); + } else { + arrayHelpers.push( + documentSet.id + ); + } + }} + /> + ); + })} +
+
+ )} + /> + ) : ( + + No Document Sets available.{" "} + {user?.role !== "admin" && ( <> - Select which{" "} - {!user || user.role === "admin" ? ( - - Document Sets - - ) : ( - "Document Sets" - )}{" "} - that this Assistant should search - through. If none are specified, the - Assistant will search through all - available documents in order to try and - respond to queries. + If this functionality would be useful, + reach out to the administrators of + Danswer for assistance. -
-
+ )} + + )} - {documentSets.length > 0 ? ( - ( -
-
- {documentSets.map((documentSet) => { - const ind = - values.document_set_ids.indexOf( - documentSet.id - ); - let isSelected = ind !== -1; - return ( - { - if (isSelected) { - arrayHelpers.remove(ind); - } else { - arrayHelpers.push( - documentSet.id - ); - } - }} - /> - ); - })} -
-
- )} - /> - ) : ( - - No Document Sets available.{" "} - {user?.role !== "admin" && ( - <> - If this functionality would be useful, - reach out to the administrators of - Danswer for assistance. - - )} - - )} - -
- { - const value = e.target.value; - if ( - value === "" || - /^[0-9]+$/.test(value) - ) { - setFieldValue("num_chunks", value); - } - }} - /> - - + { + const value = e.target.value; + if ( + value === "" || + /^[0-9]+$/.test(value) + ) { + setFieldValue("num_chunks", value); } - /> - - + + + + -
- - )} -
-
- )} - - )} + /> +
+ + )} +
+ + )} + + )} - {internetSearchTool && ( - { - toggleToolInValues(internetSearchTool.id); - }} - /> - )} + {internetSearchTool && ( + { + toggleToolInValues(internetSearchTool.id); + }} + /> + )} - {customTools.length > 0 && ( - <> - {customTools.map((tool) => ( - { - toggleToolInValues(tool.id); - }} - /> - ))} - - )} -
+ {customTools.length > 0 && ( + <> + {customTools.map((tool) => ( + { + toggleToolInValues(tool.id); + }} + /> + ))} + + )} +
+
+ + + {showAdvancedOptions && ( + <> {llmProviders.length > 0 && ( <> - - { @@ -726,10 +1032,11 @@ export function AssistantEditor({ /> )} -
+ +
- Add Starter Messages (Optional){" "} + Starter Messages (Optional){" "}
{values.starter_messages && values.starter_messages.length > 0 && - values.starter_messages.map((_, index) => { - return ( -
-
-
-
- - - Shows up as the "title" for - this Starter Message. For example, - "Write an email". - - { + return ( +
+
+
+
+ + + Shows up as the "title" + for this Starter Message. For + example, "Write an email". + + - -
+ autoComplete="off" + /> + +
-
- - - A description which tells the user - what they might want to use this - Starter Message for. For example - "to a client about a new - feature" - - + + + A description which tells the user + what they might want to use this + Starter Message for. For example + "to a client about a new + feature" + + - -
+ autoComplete="off" + /> + +
-
- - - The actual message to be sent as the - initial user message if a user selects - this starter prompt. For example, - "Write me an email to a client - about a new billing feature we just - released." - - + + + The actual message to be sent as the + initial user message if a user + selects this starter prompt. For + example, "Write me an email to + a client about a new billing feature + we just released." + + - + +
+
+
+ + arrayHelpers.remove(index) + } />
-
- - arrayHelpers.remove(index) - } - /> -
-
- ); - })} + ); + } + )} @@ -874,79 +1186,31 @@ export function AssistantEditor({ {isPaidEnterpriseFeaturesEnabled && userGroups && - (!user || user.role === "admin") && ( - <> - - - - - {userGroups && - userGroups.length > 0 && - !values.is_public && ( -
- - Select which User Groups should have access to - this Assistant. - -
- {userGroups.map((userGroup) => { - const isSelected = values.groups.includes( - userGroup.id - ); - return ( - { - if (isSelected) { - setFieldValue( - "groups", - values.groups.filter( - (id) => id !== userGroup.id - ) - ); - } else { - setFieldValue("groups", [ - ...values.groups, - userGroup.id, - ]); - } - }} - > -
- -
- {userGroup.name} -
-
-
- ); - })} -
-
- )} - + userGroups.length > 0 && ( + )} + + )} -
- -
-
+
+
); diff --git a/web/src/app/admin/assistants/CollapsibleSection.tsx b/web/src/app/admin/assistants/CollapsibleSection.tsx index 72b598846e6..139f93d26e7 100644 --- a/web/src/app/admin/assistants/CollapsibleSection.tsx +++ b/web/src/app/admin/assistants/CollapsibleSection.tsx @@ -39,8 +39,9 @@ const CollapsibleSection: React.FC = ({ `} onClick={toggleCollapse} > + {" "} {isCollapsed ? ( - + {prompt}{" "} diff --git a/web/src/app/admin/assistants/PersonaTable.tsx b/web/src/app/admin/assistants/PersonaTable.tsx index 73fcbeb2b6a..aa35a0f17a9 100644 --- a/web/src/app/admin/assistants/PersonaTable.tsx +++ b/web/src/app/admin/assistants/PersonaTable.tsx @@ -5,12 +5,15 @@ import { Persona } from "./interfaces"; import { useRouter } from "next/navigation"; import { CustomCheckbox } from "@/components/CustomCheckbox"; import { usePopup } from "@/components/admin/connectors/Popup"; -import { useState } from "react"; +import { useState, useMemo, useEffect } from "react"; import { UniqueIdentifier } from "@dnd-kit/core"; import { DraggableTable } from "@/components/table/DraggableTable"; import { deletePersona, personaComparator } from "./lib"; import { FiEdit2 } from "react-icons/fi"; import { TrashIcon } from "@/components/icons/icons"; +import { getCurrentUser } from "@/lib/user"; +import { UserRole, User } from "@/lib/types"; +import { useUser } from "@/components/user/UserProvider"; function PersonaTypeDisplay({ persona }: { persona: Persona }) { if (persona.default_persona) { @@ -21,24 +24,58 @@ function PersonaTypeDisplay({ persona }: { persona: Persona }) { return Global; } + if (persona.groups.length > 0 || persona.users.length > 0) { + return Shared; + } + return Personal {persona.owner && <>({persona.owner.email})}; } -export function PersonasTable({ personas }: { personas: Persona[] }) { +const togglePersonaVisibility = async ( + personaId: number, + isVisible: boolean +) => { + const response = await fetch(`/api/admin/persona/${personaId}/visible`, { + method: "PATCH", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + is_visible: !isVisible, + }), + }); + return response; +}; + +export function PersonasTable({ + allPersonas, + editablePersonas, +}: { + allPersonas: Persona[]; + editablePersonas: Persona[]; +}) { const router = useRouter(); const { popup, setPopup } = usePopup(); - const availablePersonaIds = new Set( - personas.map((persona) => persona.id.toString()) + const { isLoadingUser, isAdmin } = useUser(); + + const editablePersonaIds = new Set( + editablePersonas.map((p) => p.id.toString()) ); - const sortedPersonas = [...personas]; - sortedPersonas.sort(personaComparator); + + const sortedPersonas = useMemo(() => { + const editable = editablePersonas.sort(personaComparator); + const nonEditable = allPersonas + .filter((p) => !editablePersonaIds.has(p.id.toString())) + .sort(personaComparator); + return [...editable, ...nonEditable]; + }, [allPersonas, editablePersonas]); const [finalPersonas, setFinalPersonas] = useState( sortedPersonas.map((persona) => persona.id.toString()) ); const finalPersonaValues = finalPersonas - .filter((id) => availablePersonaIds.has(id)) + .filter((id) => new Set(allPersonas.map((p) => p.id.toString())).has(id)) .map((id) => { return sortedPersonas.find( (persona) => persona.id.toString() === id @@ -71,6 +108,10 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { } }; + if (isLoadingUser) { + return <>; + } + return (
{popup} @@ -78,12 +119,14 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { Assistants will be displayed as options on the Chat / Search interfaces in the order they are displayed below. Assistants marked as hidden will - not be displayed. + not be displayed. Editable assistants are shown at the top. { + const isEditable = editablePersonaIds.has(persona.id.toString()); return { id: persona.id.toString(), cells: [ @@ -112,28 +155,22 @@ export function PersonasTable({ personas }: { personas: Persona[] }) {
{ - const response = await fetch( - `/api/admin/persona/${persona.id}/visible`, - { - method: "PATCH", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - is_visible: !persona.is_visible, - }), + if (isEditable) { + const response = await togglePersonaVisibility( + persona.id, + persona.is_visible + ); + if (response.ok) { + router.refresh(); + } else { + setPopup({ + type: "error", + message: `Failed to update persona - ${await response.text()}`, + }); } - ); - if (response.ok) { - router.refresh(); - } else { - setPopup({ - type: "error", - message: `Failed to update persona - ${await response.text()}`, - }); } }} - className="px-1 py-0.5 hover:bg-hover-light rounded flex cursor-pointer select-none w-fit" + className={`px-1 py-0.5 rounded flex ${isEditable ? "hover:bg-hover cursor-pointer" : ""} select-none w-fit`} >
{!persona.is_visible ? ( @@ -148,7 +185,7 @@ export function PersonasTable({ personas }: { personas: Persona[] }) {
,
- {!persona.default_persona ? ( + {!persona.default_persona && isEditable ? (
{ @@ -170,7 +207,7 @@ export function PersonasTable({ personas }: { personas: Persona[] }) {
, ], - staticModifiers: [[1, "lg:w-[300px] xl:w-[400px] 2xl:w-[550px]"]], + staticModifiers: [[1, "lg:w-[250px] xl:w-[400px] 2xl:w-[550px]"]], }; })} setRows={updatePersonaOrder} diff --git a/web/src/app/admin/assistants/[id]/page.tsx b/web/src/app/admin/assistants/[id]/page.tsx index 86b73536037..fab6f9f038b 100644 --- a/web/src/app/admin/assistants/[id]/page.tsx +++ b/web/src/app/admin/assistants/[id]/page.tsx @@ -41,11 +41,9 @@ export default async function Page({ params }: { params: { id: string } }) { } return ( -
+
- } /> - {body}
); diff --git a/web/src/app/admin/assistants/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts index 0a06ac4cc82..0696b5ae885 100644 --- a/web/src/app/admin/assistants/interfaces.ts +++ b/web/src/app/admin/assistants/interfaces.ts @@ -38,4 +38,7 @@ export interface Persona { default_persona: boolean; users: MinimalUserSnapshot[]; groups: number[]; + icon_shape?: number; + icon_color?: string; + uploaded_image_id?: string; } diff --git a/web/src/app/admin/assistants/lib.ts b/web/src/app/admin/assistants/lib.ts index 4d42789d810..613f98145f1 100644 --- a/web/src/app/admin/assistants/lib.ts +++ b/web/src/app/admin/assistants/lib.ts @@ -15,7 +15,11 @@ interface PersonaCreationRequest { starter_messages: StarterMessage[] | null; users?: string[]; groups: number[]; - tool_ids: number[]; // Added tool_ids to the interface + tool_ids: number[]; + icon_color: string | null; + icon_shape: number | null; + remove_image?: boolean; + uploaded_image: File | null; } interface PersonaUpdateRequest { @@ -35,7 +39,11 @@ interface PersonaUpdateRequest { starter_messages: StarterMessage[] | null; users?: string[]; groups: number[]; - tool_ids: number[]; // Added tool_ids to the interface + tool_ids: number[]; + icon_color: string | null; + icon_shape: number | null; + remove_image: boolean; + uploaded_image: File | null; } function promptNameFromPersonaName(personaName: string) { @@ -98,7 +106,8 @@ function updatePrompt({ function buildPersonaAPIBody( creationRequest: PersonaCreationRequest | PersonaUpdateRequest, - promptId: number + promptId: number, + uploaded_image_id: string | null ) { const { name, @@ -109,7 +118,10 @@ function buildPersonaAPIBody( is_public, groups, users, - tool_ids, // Added tool_ids to the destructuring + tool_ids, + icon_color, + icon_shape, + remove_image, } = creationRequest; return { @@ -127,10 +139,31 @@ function buildPersonaAPIBody( starter_messages: creationRequest.starter_messages, users, groups, - tool_ids, // Added tool_ids to the return object + tool_ids, + icon_color, + icon_shape, + uploaded_image_id, + remove_image, }; } +export async function uploadFile(file: File): Promise { + const formData = new FormData(); + formData.append("file", file); + const response = await fetch("/api/admin/persona/upload-image", { + method: "POST", + body: formData, + }); + + if (!response.ok) { + console.error("Failed to upload file"); + return null; + } + + const responseJson = await response.json(); + return responseJson.file_id; +} + export async function createPersona( personaCreationRequest: PersonaCreationRequest ): Promise<[Response, Response | null]> { @@ -145,6 +178,14 @@ export async function createPersona( ? (await createPromptResponse.json()).id : null; + let fileId = null; + if (personaCreationRequest.uploaded_image) { + fileId = await uploadFile(personaCreationRequest.uploaded_image); + if (!fileId) { + return [createPromptResponse, null]; + } + } + const createPersonaResponse = promptId !== null ? await fetch("/api/persona", { @@ -153,7 +194,7 @@ export async function createPersona( "Content-Type": "application/json", }, body: JSON.stringify( - buildPersonaAPIBody(personaCreationRequest, promptId) + buildPersonaAPIBody(personaCreationRequest, promptId, fileId) ), }) : null; @@ -188,6 +229,14 @@ export async function updatePersona( promptId = promptResponse.ok ? (await promptResponse.json()).id : null; } + let fileId = null; + if (personaUpdateRequest.uploaded_image) { + fileId = await uploadFile(personaUpdateRequest.uploaded_image); + if (!fileId) { + return [promptResponse, null]; + } + } + const updatePersonaResponse = promptResponse.ok && promptId ? await fetch(`/api/persona/${id}`, { @@ -196,7 +245,7 @@ export async function updatePersona( "Content-Type": "application/json", }, body: JSON.stringify( - buildPersonaAPIBody(personaUpdateRequest, promptId) + buildPersonaAPIBody(personaUpdateRequest, promptId, fileId) ), }) : null; diff --git a/web/src/app/admin/assistants/new/page.tsx b/web/src/app/admin/assistants/new/page.tsx index 5123dc4f4f0..c770056321f 100644 --- a/web/src/app/admin/assistants/new/page.tsx +++ b/web/src/app/admin/assistants/new/page.tsx @@ -20,6 +20,7 @@ export default async function Page() { @@ -28,7 +29,7 @@ export default async function Page() { } return ( -
+
); } - const personas = (await personaResponse.json()) as Persona[]; + const allPersonas = (await allPersonaResponse.json()) as Persona[]; + const editablePersonas = (await editablePersonaResponse.json()) as Persona[]; return (
- } title="Assistants" /> + } title="Assistants" /> Assistants are a way to build custom search/question-answering @@ -57,7 +64,10 @@ export default async function Page() { Existing Assistants - +
); diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx index 23968d585fe..4193c75d956 100644 --- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx +++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx @@ -79,13 +79,9 @@ export const SlackBotCreationForm = ({ existingSlackBotConfig?.channel_config?.respond_to_bots || false, enable_auto_filters: existingSlackBotConfig?.enable_auto_filters || false, - respond_member_group_list: ( + respond_member_group_list: existingSlackBotConfig?.channel_config - ?.respond_team_member_list ?? [] - ).concat( - existingSlackBotConfig?.channel_config - ?.respond_slack_group_list ?? [] - ), + ?.respond_member_group_list ?? [], still_need_help_enabled: existingSlackBotConfig?.channel_config?.follow_up_tags !== undefined, @@ -133,14 +129,7 @@ export const SlackBotCreationForm = ({ channel_names: values.channel_names.filter( (channelName) => channelName !== "" ), - respond_team_member_list: values.respond_member_group_list.filter( - (teamMemberEmail) => - /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(teamMemberEmail) - ), - respond_slack_group_list: values.respond_member_group_list.filter( - (slackGroupName) => - !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(slackGroupName) - ), + respond_member_group_list: values.respond_member_group_list, usePersona: usingPersonas, standard_answer_categories: values.standard_answer_categories.map( (category) => category.id @@ -257,13 +246,13 @@ export const SlackBotCreationForm = ({ /> @@ -306,14 +295,14 @@ export const SlackBotCreationForm = ({ [Optional] Data Sources and Prompts - Use either a Persona or Document Sets to control how - DanswerBot answers. + Use either an Assistant or Document Sets to control + how DanswerBot answers.
  • - You should use a Persona if you also want to customize - the prompt and retrieval settings. + You should use an Assistant if you also want to + customize the prompt and retrieval settings.
  • You should use Document Sets if you just want to control @@ -324,8 +313,9 @@ export const SlackBotCreationForm = ({ NOTE: whichever tab you are when you submit the form will be the one that is used. For example, if you are on the - "Personas" tab, then the Persona will be used, - even if you have Document Sets selected. + "Assistants" tab, then the Assistant and its + attached knowledge will be used, even if you have Document + Sets selected.
@@ -335,7 +325,7 @@ export const SlackBotCreationForm = ({ > Document Sets - Personas + Assistants @@ -396,7 +386,7 @@ export const SlackBotCreationForm = ({ { diff --git a/web/src/app/admin/bot/lib.ts b/web/src/app/admin/bot/lib.ts index b3d6ec678b3..c2d2b291502 100644 --- a/web/src/app/admin/bot/lib.ts +++ b/web/src/app/admin/bot/lib.ts @@ -14,8 +14,7 @@ interface SlackBotConfigCreationRequest { questionmark_prefilter_enabled: boolean; respond_tag_only: boolean; respond_to_bots: boolean; - respond_team_member_list: string[]; - respond_slack_group_list: string[]; + respond_member_group_list: string[]; follow_up_tags?: string[]; usePersona: boolean; response_type: SlackBotResponseType; @@ -43,8 +42,7 @@ const buildRequestBodyFromCreationRequest = ( respond_tag_only: creationRequest.respond_tag_only, respond_to_bots: creationRequest.respond_to_bots, enable_auto_filters: creationRequest.enable_auto_filters, - respond_team_member_list: creationRequest.respond_team_member_list, - respond_slack_group_list: creationRequest.respond_slack_group_list, + respond_member_group_list: creationRequest.respond_member_group_list, answer_filters: buildFiltersFromCreationRequest(creationRequest), follow_up_tags: creationRequest.follow_up_tags?.filter((tag) => tag !== ""), ...(creationRequest.usePersona diff --git a/web/src/app/admin/bot/page.tsx b/web/src/app/admin/bot/page.tsx index 8c1eca6781d..14f270ee9bc 100644 --- a/web/src/app/admin/bot/page.tsx +++ b/web/src/app/admin/bot/page.tsx @@ -301,7 +301,7 @@ const Page = () => { return (
} + icon={} title="Slack Bot Configuration" /> diff --git a/web/src/app/admin/models/llm/ConfiguredLLMProviderDisplay.tsx b/web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx similarity index 100% rename from web/src/app/admin/models/llm/ConfiguredLLMProviderDisplay.tsx rename to web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx diff --git a/web/src/app/admin/configuration/llm/CustomLLMProviderUpdateForm.tsx b/web/src/app/admin/configuration/llm/CustomLLMProviderUpdateForm.tsx new file mode 100644 index 00000000000..80ff1f456b9 --- /dev/null +++ b/web/src/app/admin/configuration/llm/CustomLLMProviderUpdateForm.tsx @@ -0,0 +1,532 @@ +import { LoadingAnimation } from "@/components/Loading"; +import { Button, Divider, Text } from "@tremor/react"; +import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; +import { + ArrayHelpers, + ErrorMessage, + Field, + FieldArray, + Form, + Formik, +} from "formik"; +import { FiPlus, FiTrash, FiX } from "react-icons/fi"; +import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; +import { + Label, + SubLabel, + TextArrayField, + TextFormField, + BooleanFormField, +} from "@/components/admin/connectors/Field"; +import { useState } from "react"; +import { Bubble } from "@/components/Bubble"; +import { GroupsIcon } from "@/components/icons/icons"; +import { useSWRConfig } from "swr"; +import { useUserGroups } from "@/lib/hooks"; +import { FullLLMProvider } from "./interfaces"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import * as Yup from "yup"; +import isEqual from "lodash/isEqual"; + +function customConfigProcessing(customConfigsList: [string, string][]) { + const customConfig: { [key: string]: string } = {}; + customConfigsList.forEach(([key, value]) => { + customConfig[key] = value; + }); + return customConfig; +} + +export function CustomLLMProviderUpdateForm({ + onClose, + existingLlmProvider, + shouldMarkAsDefault, + setPopup, +}: { + onClose: () => void; + existingLlmProvider?: FullLLMProvider; + shouldMarkAsDefault?: boolean; + setPopup?: (popup: PopupSpec) => void; +}) { + const { mutate } = useSWRConfig(); + + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); + + // EE only + const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); + + const [isTesting, setIsTesting] = useState(false); + const [testError, setTestError] = useState(""); + + const [showAdvancedOptions, setShowAdvancedOptions] = useState(false); + + // Define the initial values based on the provider's requirements + const initialValues = { + name: existingLlmProvider?.name ?? "", + provider: existingLlmProvider?.provider ?? "", + api_key: existingLlmProvider?.api_key ?? "", + api_base: existingLlmProvider?.api_base ?? "", + api_version: existingLlmProvider?.api_version ?? "", + default_model_name: existingLlmProvider?.default_model_name ?? null, + fast_default_model_name: + existingLlmProvider?.fast_default_model_name ?? null, + model_names: existingLlmProvider?.model_names ?? [], + custom_config_list: existingLlmProvider?.custom_config + ? Object.entries(existingLlmProvider.custom_config) + : [], + is_public: existingLlmProvider?.is_public ?? true, + groups: existingLlmProvider?.groups ?? [], + }; + + // Setup validation schema if required + const validationSchema = Yup.object({ + name: Yup.string().required("Display Name is required"), + provider: Yup.string().required("Provider Name is required"), + api_key: Yup.string(), + api_base: Yup.string(), + api_version: Yup.string(), + model_names: Yup.array(Yup.string().required("Model name is required")), + default_model_name: Yup.string().required("Model name is required"), + fast_default_model_name: Yup.string().nullable(), + custom_config_list: Yup.array(), + // EE Only + is_public: Yup.boolean().required(), + groups: Yup.array().of(Yup.number()), + }); + + return ( + { + setSubmitting(true); + + if (values.model_names.length === 0) { + const fullErrorMsg = "At least one model name is required"; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + setSubmitting(false); + return; + } + + // don't set groups if marked as public + const groups = values.is_public ? [] : values.groups; + + // test the configuration + if (!isEqual(values, initialValues)) { + setIsTesting(true); + + const response = await fetch("/api/admin/llm/test", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + custom_config: customConfigProcessing(values.custom_config_list), + ...values, + }), + }); + setIsTesting(false); + + if (!response.ok) { + const errorMsg = (await response.json()).detail; + setTestError(errorMsg); + return; + } + } + + const response = await fetch(LLM_PROVIDERS_ADMIN_URL, { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + ...values, + custom_config: customConfigProcessing(values.custom_config_list), + }), + }); + + if (!response.ok) { + const errorMsg = (await response.json()).detail; + const fullErrorMsg = existingLlmProvider + ? `Failed to update provider: ${errorMsg}` + : `Failed to enable provider: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + + if (shouldMarkAsDefault) { + const newLlmProvider = (await response.json()) as FullLLMProvider; + const setDefaultResponse = await fetch( + `${LLM_PROVIDERS_ADMIN_URL}/${newLlmProvider.id}/default`, + { + method: "POST", + } + ); + if (!setDefaultResponse.ok) { + const errorMsg = (await setDefaultResponse.json()).detail; + const fullErrorMsg = `Failed to set provider as default: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + } + + mutate(LLM_PROVIDERS_ADMIN_URL); + onClose(); + + const successMsg = existingLlmProvider + ? "Provider updated successfully!" + : "Provider enabled successfully!"; + if (setPopup) { + setPopup({ + type: "success", + message: successMsg, + }); + } else { + alert(successMsg); + } + + setSubmitting(false); + }} + > + {({ values, setFieldValue }) => { + return ( +
+ + + + Should be one of the providers listed at{" "} + + https://docs.litellm.ai/docs/providers + + . + + } + placeholder="Name of the custom provider" + /> + + + + + Fill in the following as is needed. Refer to the LiteLLM + documentation for the model provider name specified above in order + to determine which fields are required. + + + + + + + + + + + <> +
+ Additional configurations needed by the model provider. Are + passed to litellm via environment variables. +
+ +
+ For example, when configuring the Cloudflare provider, you + would need to set `CLOUDFLARE_ACCOUNT_ID` as the key and your + Cloudflare account ID as the value. +
+ +
+ + ) => ( +
+ {values.custom_config_list.map((_, index) => { + return ( +
+
+
+
+ + + +
+ +
+ + + +
+
+
+ arrayHelpers.remove(index)} + /> +
+
+
+ ); + })} + + +
+ )} + /> + + + + + List the individual models that you want to make available as + a part of this provider. At least one must be specified. For + the best experience your [Provider Name]/[Model Name] should + match one of the pairs listed{" "} +
+ here + + . + + } + /> + + + + + + + + + + + + {showAdvancedOptions && ( + <> + {isPaidEnterpriseFeaturesEnabled && userGroups && ( + <> + + + {userGroups && + userGroups.length > 0 && + !values.is_public && ( +
+ + Select which User Groups should have access to this + LLM Provider. + +
+ {userGroups.map((userGroup) => { + const isSelected = values.groups.includes( + userGroup.id + ); + return ( + { + if (isSelected) { + setFieldValue( + "groups", + values.groups.filter( + (id) => id !== userGroup.id + ) + ); + } else { + setFieldValue("groups", [ + ...values.groups, + userGroup.id, + ]); + } + }} + > +
+ +
{userGroup.name}
+
+
+ ); + })} +
+
+ )} + + )} + + )} + +
+ {/* NOTE: this is above the test button to make sure it's visible */} + {testError && ( + {testError} + )} + +
+ + {existingLlmProvider && ( + + )} +
+
+ + ); + }} + + ); +} diff --git a/web/src/app/admin/models/llm/LLMConfiguration.tsx b/web/src/app/admin/configuration/llm/LLMConfiguration.tsx similarity index 100% rename from web/src/app/admin/models/llm/LLMConfiguration.tsx rename to web/src/app/admin/configuration/llm/LLMConfiguration.tsx diff --git a/web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx b/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx similarity index 69% rename from web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx rename to web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx index 2e048d41ba8..49d95d096f5 100644 --- a/web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx +++ b/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx @@ -1,4 +1,5 @@ import { LoadingAnimation } from "@/components/Loading"; +import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; import { Button, Divider, Text } from "@tremor/react"; import { Form, Formik } from "formik"; import { FiTrash } from "react-icons/fi"; @@ -6,11 +7,21 @@ import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; import { SelectorFormField, TextFormField, + BooleanFormField, + MultiSelectField, } from "@/components/admin/connectors/Field"; import { useState } from "react"; +import { Bubble } from "@/components/Bubble"; +import { GroupsIcon } from "@/components/icons/icons"; import { useSWRConfig } from "swr"; +import { + defaultModelsByProvider, + getDisplayNameForModel, + useUserGroups, +} from "@/lib/hooks"; import { FullLLMProvider, WellKnownLLMProviderDescriptor } from "./interfaces"; import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; import * as Yup from "yup"; import isEqual from "lodash/isEqual"; @@ -29,9 +40,16 @@ export function LLMProviderUpdateForm({ }) { const { mutate } = useSWRConfig(); + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); + + // EE only + const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); + const [isTesting, setIsTesting] = useState(false); const [testError, setTestError] = useState(""); + const [showAdvancedOptions, setShowAdvancedOptions] = useState(false); + // Define the initial values based on the provider's requirements const initialValues = { name: existingLlmProvider?.name ?? "", @@ -54,12 +72,14 @@ export function LLMProviderUpdateForm({ }, {} as { [key: string]: string } ), + is_public: existingLlmProvider?.is_public ?? true, + groups: existingLlmProvider?.groups ?? [], + display_model_names: + existingLlmProvider?.display_model_names || + defaultModelsByProvider[llmProviderDescriptor.name] || + [], }; - const [validatedConfig, setValidatedConfig] = useState( - existingLlmProvider ? initialValues : null - ); - // Setup validation schema if required const validationSchema = Yup.object({ name: Yup.string().required("Display Name is required"), @@ -91,6 +111,10 @@ export function LLMProviderUpdateForm({ : {}), default_model_name: Yup.string().required("Model name is required"), fast_default_model_name: Yup.string().nullable(), + // EE Only + is_public: Yup.boolean().required(), + groups: Yup.array().of(Yup.number()), + display_model_names: Yup.array().of(Yup.string()), }); return ( @@ -193,8 +217,8 @@ export function LLMProviderUpdateForm({ setSubmitting(false); }} > - {({ values }) => ( -
+ {({ values, setFieldValue }) => ( + - - {llmProviderDescriptor.api_key_required && ( ({ - name, + name: getDisplayNameForModel(name), value: name, }))} maxHeight="max-h-56" @@ -274,7 +296,7 @@ export function LLMProviderUpdateForm({ the Default Model configured above.`} label="[Optional] Fast Model" options={llmProviderDescriptor.llm_names.map((name) => ({ - name, + name: getDisplayNameForModel(name), value: name, }))} includeDefault @@ -293,6 +315,89 @@ export function LLMProviderUpdateForm({ + {llmProviderDescriptor.name != "azure" && ( + + )} + + {showAdvancedOptions && ( + <> + {llmProviderDescriptor.llm_names.length > 0 && ( +
+ ({ + value: name, + label: getDisplayNameForModel(name), + }))} + onChange={(selected) => + setFieldValue("display_model_names", selected) + } + /> +
+ )} + + {isPaidEnterpriseFeaturesEnabled && userGroups && ( + <> + + + {userGroups && userGroups.length > 0 && !values.is_public && ( +
+ + Select which User Groups should have access to this LLM + Provider. + +
+ {userGroups.map((userGroup) => { + const isSelected = values.groups.includes( + userGroup.id + ); + return ( + { + if (isSelected) { + setFieldValue( + "groups", + values.groups.filter( + (id) => id !== userGroup.id + ) + ); + } else { + setFieldValue("groups", [ + ...values.groups, + userGroup.id, + ]); + } + }} + > +
+ +
{userGroup.name}
+
+
+ ); + })} +
+
+ )} + + )} + + )}
{/* NOTE: this is above the test button to make sure it's visible */} {testError && {testError}} diff --git a/web/src/app/admin/configuration/llm/constants.ts b/web/src/app/admin/configuration/llm/constants.ts new file mode 100644 index 00000000000..a265f4a2b2d --- /dev/null +++ b/web/src/app/admin/configuration/llm/constants.ts @@ -0,0 +1,4 @@ +export const LLM_PROVIDERS_ADMIN_URL = "/api/admin/llm/provider"; + +export const EMBEDDING_PROVIDERS_ADMIN_URL = + "/api/admin/embedding/embedding-provider"; diff --git a/web/src/app/admin/models/llm/interfaces.ts b/web/src/app/admin/configuration/llm/interfaces.ts similarity index 84% rename from web/src/app/admin/models/llm/interfaces.ts rename to web/src/app/admin/configuration/llm/interfaces.ts index 2175a273125..2d0d49196b4 100644 --- a/web/src/app/admin/models/llm/interfaces.ts +++ b/web/src/app/admin/configuration/llm/interfaces.ts @@ -17,6 +17,8 @@ export interface WellKnownLLMProviderDescriptor { llm_names: string[]; default_model: string | null; default_fast_model: string | null; + is_public: boolean; + groups: number[]; } export interface LLMProvider { @@ -28,6 +30,9 @@ export interface LLMProvider { custom_config: { [key: string]: string } | null; default_model_name: string; fast_default_model_name: string | null; + is_public: boolean; + groups: number[]; + display_model_names: string[] | null; } export interface FullLLMProvider extends LLMProvider { @@ -44,4 +49,7 @@ export interface LLMProviderDescriptor { default_model_name: string; fast_default_model_name: string | null; is_default_provider: boolean | null; + is_public: boolean; + groups: number[]; + display_model_names: string[] | null; } diff --git a/web/src/app/admin/models/llm/page.tsx b/web/src/app/admin/configuration/llm/page.tsx similarity index 77% rename from web/src/app/admin/models/llm/page.tsx rename to web/src/app/admin/configuration/llm/page.tsx index a774b36966d..9771a53c3af 100644 --- a/web/src/app/admin/models/llm/page.tsx +++ b/web/src/app/admin/configuration/llm/page.tsx @@ -3,13 +3,14 @@ import { AdminPageTitle } from "@/components/admin/Title"; import { FiCpu } from "react-icons/fi"; import { LLMConfiguration } from "./LLMConfiguration"; +import { CpuIcon } from "@/components/icons/icons"; const Page = () => { return (
} + icon={} /> diff --git a/web/src/app/admin/configuration/search/UpgradingPage.tsx b/web/src/app/admin/configuration/search/UpgradingPage.tsx new file mode 100644 index 00000000000..da379656336 --- /dev/null +++ b/web/src/app/admin/configuration/search/UpgradingPage.tsx @@ -0,0 +1,116 @@ +import { ThreeDotsLoader } from "@/components/Loading"; +import { Modal } from "@/components/Modal"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ConnectorIndexingStatus } from "@/lib/types"; +import { Button, Text, Title } from "@tremor/react"; +import { useState } from "react"; +import useSWR, { mutate } from "swr"; +import { ReindexingProgressTable } from "../../../../components/embedding/ReindexingProgressTable"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { + CloudEmbeddingModel, + HostedEmbeddingModel, +} from "../../../../components/embedding/interfaces"; +import { Connector } from "@/lib/connectors/connectors"; + +export default function UpgradingPage({ + futureEmbeddingModel, +}: { + futureEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel; +}) { + const [isCancelling, setIsCancelling] = useState(false); + + const { data: connectors } = useSWR[]>( + "/api/manage/connector", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const { + data: ongoingReIndexingStatus, + isLoading: isLoadingOngoingReIndexingStatus, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status?secondary_index=true", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const onCancel = async () => { + const response = await fetch("/api/search-settings/cancel-new-embedding", { + method: "POST", + }); + if (response.ok) { + mutate("/api/search-settings/get-secondary-search-settings"); + } else { + alert( + `Failed to cancel embedding model update - ${await response.text()}` + ); + } + setIsCancelling(false); + }; + + return ( + <> + {isCancelling && ( + setIsCancelling(false)} + title="Cancel Embedding Model Switch" + > +
+
+ Are you sure you want to cancel? +
+
+ Cancelling will revert to the previous model and all progress will + be lost. +
+
+ +
+
+
+ )} + + {futureEmbeddingModel && connectors && connectors.length > 0 && ( +
+ Current Upgrade Status +
+
+ Currently in the process of switching to:{" "} + {futureEmbeddingModel.model_name} +
+ + + + + The table below shows the re-indexing progress of all existing + connectors. Once all connectors have been re-indexed successfully, + the new model will be used for all search queries. Until then, we + will use the old model so that no downtime is necessary during + this transition. + + + {isLoadingOngoingReIndexingStatus ? ( + + ) : ongoingReIndexingStatus ? ( + + ) : ( + + )} +
+
+ )} + + ); +} diff --git a/web/src/app/admin/configuration/search/page.tsx b/web/src/app/admin/configuration/search/page.tsx new file mode 100644 index 00000000000..b2abebae725 --- /dev/null +++ b/web/src/app/admin/configuration/search/page.tsx @@ -0,0 +1,191 @@ +"use client"; + +import { ThreeDotsLoader } from "@/components/Loading"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { Button, Card, Text, Title } from "@tremor/react"; +import useSWR from "swr"; +import { ModelPreview } from "../../../../components/embedding/ModelSelector"; +import { + AVAILABLE_CLOUD_PROVIDERS, + HostedEmbeddingModel, + CloudEmbeddingModel, + AVAILABLE_MODELS, +} from "@/components/embedding/interfaces"; + +import { ErrorCallout } from "@/components/ErrorCallout"; + +export interface EmbeddingDetails { + api_key: string; + custom_config: any; + default_model_id?: number; + name: string; +} +import { EmbeddingIcon } from "@/components/icons/icons"; + +import Link from "next/link"; +import { SavedSearchSettings } from "../../embeddings/interfaces"; +import UpgradingPage from "./UpgradingPage"; +import { useContext } from "react"; +import { SettingsContext } from "@/components/settings/SettingsProvider"; + +function Main() { + const settings = useContext(SettingsContext); + const { + data: currentEmeddingModel, + isLoading: isLoadingCurrentModel, + error: currentEmeddingModelError, + } = useSWR( + "/api/search-settings/get-current-search-settings", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const { data: searchSettings, isLoading: isLoadingSearchSettings } = + useSWR( + "/api/search-settings/get-current-search-settings", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const { + data: futureEmbeddingModel, + isLoading: isLoadingFutureModel, + error: futureEmeddingModelError, + } = useSWR( + "/api/search-settings/get-secondary-search-settings", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + if ( + isLoadingCurrentModel || + isLoadingFutureModel || + isLoadingSearchSettings + ) { + return ; + } + + if ( + currentEmeddingModelError || + !currentEmeddingModel || + futureEmeddingModelError + ) { + return ; + } + + const currentModelName = currentEmeddingModel?.model_name; + const AVAILABLE_CLOUD_PROVIDERS_FLATTENED = AVAILABLE_CLOUD_PROVIDERS.flatMap( + (provider) => + provider.embedding_models.map((model) => ({ + ...model, + provider_type: provider.provider_type, + model_name: model.model_name, // Ensure model_name is set for consistency + })) + ); + + const currentModel: CloudEmbeddingModel | HostedEmbeddingModel = + AVAILABLE_MODELS.find((model) => model.model_name === currentModelName) || + AVAILABLE_CLOUD_PROVIDERS_FLATTENED.find( + (model) => model.model_name === currentEmeddingModel.model_name + )!; + + return ( +
+ {!futureEmbeddingModel ? ( + <> + {settings?.settings.needs_reindexing && ( +

+ Your search settings are currently out of date! We recommend + updating your search settings and re-indexing. +

+ )} + Embedding Model + + {currentModel ? ( + + ) : ( + Choose your Embedding Model + )} + + Post-processing + + + {searchSettings && ( + <> +
+
+
+ Reranking Model + + {searchSettings.rerank_model_name || "Not set"} + +
+ +
+ Results to Rerank + + {searchSettings.num_rerank} + +
+ +
+ + Multilingual Expansion + + + {searchSettings.multilingual_expansion.length > 0 + ? searchSettings.multilingual_expansion.join(", ") + : "None"} + +
+ +
+ Multipass Indexing + + {searchSettings.multipass_indexing + ? "Enabled" + : "Disabled"} + +
+ +
+ + Disable Reranking for Streaming + + + {searchSettings.disable_rerank_for_streaming + ? "Yes" + : "No"} + +
+
+
+ + )} +
+ + + + + + ) : ( + + )} +
+ ); +} + +function Page() { + return ( +
+ } + /> +
+
+ ); +} + +export default Page; diff --git a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx index c959b903754..7bd42947116 100644 --- a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx @@ -38,6 +38,67 @@ function buildConfigEntries( return obj; } +export function AdvancedConfigDisplay({ + pruneFreq, + refreshFreq, + indexingStart, +}: { + pruneFreq: number | null; + refreshFreq: number | null; + indexingStart: Date | null; +}) { + const formatRefreshFrequency = (seconds: number | null): string => { + if (seconds === null) return "-"; + const minutes = Math.round(seconds / 60); + return `${minutes} minute${minutes !== 1 ? "s" : ""}`; + }; + const formatPruneFrequency = (seconds: number | null): string => { + if (seconds === null) return "-"; + const days = Math.round(seconds / (60 * 60 * 24)); + return `${days} day${days !== 1 ? "s" : ""}`; + }; + + const formatDate = (date: Date | null): string => { + if (date === null) return "-"; + return date.toLocaleString("en-US", { + year: "numeric", + month: "long", + day: "numeric", + hour: "2-digit", + minute: "2-digit", + timeZoneName: "short", + }); + }; + + return ( + <> + Advanced Configuration + + + {pruneFreq && ( + + Pruning Frequency + {formatPruneFrequency(pruneFreq)} + + )} + {refreshFreq && ( + + Refresh Frequency + {formatRefreshFrequency(refreshFreq)} + + )} + {indexingStart && ( + + Indexing Start + {formatDate(indexingStart)} + + )} + + + + ); +} + export function ConfigDisplay({ connectorSpecificConfig, sourceType, diff --git a/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx b/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx index 8e15709bc22..7ea03747bd3 100644 --- a/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx +++ b/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx @@ -1,7 +1,7 @@ "use client"; import { Button } from "@tremor/react"; -import { CCPairFullInfo } from "./types"; +import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types"; import { usePopup } from "@/components/admin/connectors/Popup"; import { FiTrash } from "react-icons/fi"; import { deleteCCPair } from "@/lib/documentDeletion"; @@ -16,7 +16,7 @@ export function DeletionButton({ ccPair }: { ccPair: CCPairFullInfo }) { ccPair?.latest_deletion_attempt?.status === "STARTED"; let tooltip: string; - if (ccPair.connector.disabled) { + if (ccPair.status !== ConnectorCredentialPairStatus.ACTIVE) { if (isDeleting) { tooltip = "This connector is currently being deleted"; } else { @@ -41,10 +41,12 @@ export function DeletionButton({ ccPair }: { ccPair: CCPairFullInfo }) { ) } icon={FiTrash} - disabled={!ccPair.connector.disabled || isDeleting} + disabled={ + ccPair.status === ConnectorCredentialPairStatus.ACTIVE || isDeleting + } tooltip={tooltip} > - Schedule for Deletion + Delete
); diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx index eeca261979d..b9861a29759 100644 --- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx +++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx @@ -18,7 +18,9 @@ import { PageSelector } from "@/components/PageSelector"; import { localizeAndPrettify } from "@/lib/time"; import { getDocsProcessedPerMinute } from "@/lib/indexAttempt"; import { Modal } from "@/components/Modal"; -import { CheckmarkIcon, CopyIcon } from "@/components/icons/icons"; +import { CheckmarkIcon, CopyIcon, SearchIcon } from "@/components/icons/icons"; +import Link from "next/link"; +import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal"; const NUM_IN_PAGE = 8; @@ -36,44 +38,12 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { <> {indexAttemptToDisplayTraceFor && indexAttemptToDisplayTraceFor.full_exception_trace && ( - setIndexAttemptTracePopupId(null)} - > -
-
- {!copyClicked ? ( -
{ - navigator.clipboard.writeText( - indexAttemptToDisplayTraceFor.full_exception_trace! - ); - setCopyClicked(true); - setTimeout(() => setCopyClicked(false), 2000); - }} - className="flex w-fit cursor-pointer hover:bg-hover-light p-2 border-border border rounded" - > - Copy full trace - -
- ) : ( -
- Copied to clipboard - -
- )} -
-
- {indexAttemptToDisplayTraceFor.full_exception_trace} -
-
-
+ exceptionTrace={indexAttemptToDisplayTraceFor.full_exception_trace!} + /> )} + @@ -81,7 +51,7 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { Status New Doc Cnt Total Doc Cnt - Error Msg + Error Message @@ -124,9 +94,31 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { {indexAttempt.total_docs_indexed}
- - {indexAttempt.error_msg || "-"} - + {indexAttempt.error_count > 0 && ( + + + +  View Errors + + + )} + + {indexAttempt.status === "success" && ( + + {"-"} + + )} + + {indexAttempt.status === "failed" && + indexAttempt.error_msg && ( + + {indexAttempt.error_msg} + + )} + {indexAttempt.full_exception_trace && (
{ diff --git a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx index 83d6363f621..10460459e32 100644 --- a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx @@ -1,11 +1,11 @@ "use client"; import { Button } from "@tremor/react"; -import { CCPairFullInfo } from "./types"; +import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types"; import { usePopup } from "@/components/admin/connectors/Popup"; -import { disableConnector } from "@/lib/connector"; import { mutate } from "swr"; import { buildCCPairInfoUrl } from "./lib"; +import { setCCPairStatus } from "@/lib/ccPair"; export function ModifyStatusButtonCluster({ ccPair, @@ -17,13 +17,16 @@ export function ModifyStatusButtonCluster({ return ( <> {popup} - {ccPair.connector.disabled ? ( + {ccPair.status === ConnectorCredentialPairStatus.PAUSED ? ( ); diff --git a/web/src/app/admin/connector/[ccPairId]/lib.ts b/web/src/app/admin/connector/[ccPairId]/lib.ts index e83f3d406d0..c2d02b23d75 100644 --- a/web/src/app/admin/connector/[ccPairId]/lib.ts +++ b/web/src/app/admin/connector/[ccPairId]/lib.ts @@ -1,3 +1,13 @@ +import { ValidSources } from "@/lib/types"; + export function buildCCPairInfoUrl(ccPairId: string | number) { return `/api/manage/admin/cc-pair/${ccPairId}`; } + +export function buildSimilarCredentialInfoURL( + source_type: ValidSources, + get_editable: boolean = false +) { + const base = `/api/manage/admin/similar-credentials/${source_type}`; + return get_editable ? `${base}?get_editable=True` : base; +} diff --git a/web/src/app/admin/connector/[ccPairId]/page.tsx b/web/src/app/admin/connector/[ccPairId]/page.tsx index 7e613461bfc..f5da225a867 100644 --- a/web/src/app/admin/connector/[ccPairId]/page.tsx +++ b/web/src/app/admin/connector/[ccPairId]/page.tsx @@ -1,23 +1,29 @@ "use client"; -import { CCPairFullInfo } from "./types"; +import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types"; import { HealthCheckBanner } from "@/components/health/healthcheck"; import { CCPairStatus } from "@/components/Status"; import { BackButton } from "@/components/BackButton"; -import { Divider, Title } from "@tremor/react"; +import { Button, Divider, Title } from "@tremor/react"; import { IndexingAttemptsTable } from "./IndexingAttemptsTable"; -import { Text } from "@tremor/react"; -import { ConfigDisplay } from "./ConfigDisplay"; +import { AdvancedConfigDisplay, ConfigDisplay } from "./ConfigDisplay"; import { ModifyStatusButtonCluster } from "./ModifyStatusButtonCluster"; import { DeletionButton } from "./DeletionButton"; import { ErrorCallout } from "@/components/ErrorCallout"; import { ReIndexButton } from "./ReIndexButton"; import { isCurrentlyDeleting } from "@/lib/documentDeletion"; import { ValidSources } from "@/lib/types"; -import useSWR from "swr"; +import useSWR, { mutate } from "swr"; import { errorHandlingFetcher } from "@/lib/fetcher"; import { ThreeDotsLoader } from "@/components/Loading"; +import CredentialSection from "@/components/credentials/CredentialSection"; import { buildCCPairInfoUrl } from "./lib"; +import { SourceIcon } from "@/components/SourceIcon"; +import { credentialTemplates } from "@/lib/connectors/credentials"; +import { useEffect, useRef, useState } from "react"; +import { CheckmarkIcon, EditIcon, XIcon } from "@/components/icons/icons"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { updateConnectorCredentialPairName } from "@/lib/connector"; // since the uploaded files are cleaned up after some period of time // re-indexing will not work for the file connector. Also, it would not @@ -35,6 +41,43 @@ function Main({ ccPairId }: { ccPairId: number }) { { refreshInterval: 5000 } // 5 seconds ); + const [editableName, setEditableName] = useState(ccPair?.name || ""); + const [isEditing, setIsEditing] = useState(false); + const inputRef = useRef(null); + + const { popup, setPopup } = usePopup(); + useEffect(() => { + if (isEditing && inputRef.current) { + inputRef.current.focus(); + } + }, [isEditing]); + const handleNameChange = (e: React.ChangeEvent) => { + setEditableName(e.target.value); + }; + + const handleUpdateName = async () => { + try { + const response = await updateConnectorCredentialPairName( + ccPair?.id!, + editableName + ); + if (!response.ok) { + throw new Error(await response.text()); + } + mutate(buildCCPairInfoUrl(ccPairId)); + setIsEditing(false); + setPopup({ + message: "Connector name updated successfully", + type: "success", + }); + } catch (error) { + setPopup({ + message: `Failed to update connector name`, + type: "error", + }); + } + }; + if (isLoading) { return ; } @@ -49,7 +92,7 @@ function Main({ ccPairId }: { ccPairId: number }) { } const lastIndexAttempt = ccPair.index_attempts[0]; - const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt); + const isDeleting = ccPair.status === ConnectorCredentialPairStatus.DELETING; // figure out if we need to artificially deflate the number of docs indexed. // This is required since the total number of docs indexed by a CC Pair is @@ -61,72 +104,143 @@ function Main({ ccPairId }: { ccPairId: number }) { ? lastIndexAttempt.total_docs_indexed : ccPair.num_docs_indexed; + const refresh = () => { + mutate(buildCCPairInfoUrl(ccPairId)); + }; + + const startEditing = () => { + setEditableName(ccPair.name); + setIsEditing(true); + }; + + const resetEditing = () => { + setIsEditing(false); + setEditableName(ccPair.name); + }; + + const { + prune_freq: pruneFreq, + refresh_freq: refreshFreq, + indexing_start: indexingStart, + } = ccPair.connector; return ( <> + {popup}
-

{ccPair.name}

- -
- +
+
-
+ {ccPair.is_editable_for_current_user && isEditing ? ( +
+ + + +
+ ) : ( +

+ ccPair.is_editable_for_current_user && startEditing() + } + className={`group flex ${ccPair.is_editable_for_current_user ? "cursor-pointer" : ""} text-3xl text-emphasis gap-x-2 items-center font-bold`} + > + {ccPair.name} + {ccPair.is_editable_for_current_user && ( + + )} +

+ )} + + {ccPair.is_editable_for_current_user && ( +
+ {!CONNECTOR_TYPES_THAT_CANT_REINDEX.includes( + ccPair.connector.source + ) && ( + + )} + {!isDeleting && } +
+ )} +
-
Total Documents Indexed:{" "} {totalDocsIndexed}
+ {!ccPair.is_editable_for_current_user && ( +
+ {ccPair.is_public + ? "Public connectors are not editable by curators." + : "This connector belongs to groups where you don't have curator permissions, so it's not editable."} +
+ )} + {credentialTemplates[ccPair.connector.source] && + ccPair.is_editable_for_current_user && ( + <> + - + Credentials + refresh()} + /> + + )} + + + {(pruneFreq || indexingStart || refreshFreq) && ( + + )} + {/* NOTE: no divider / title here for `ConfigDisplay` since it is optional and we need to render these conditionally.*/} -
Indexing Attempts - - {!CONNECTOR_TYPES_THAT_CANT_REINDEX.includes( - ccPair.connector.source - ) && ( - - )}
-
- - -
- Delete Connector - - Deleting the connector will also delete all associated documents. - - -
-
+
+
+ {ccPair.is_editable_for_current_user && ( -
+ )}
- - {/* TODO: add document search*/} ); } @@ -136,10 +250,6 @@ export default function Page({ params }: { params: { ccPairId: string } }) { return (
-
- -
-
); diff --git a/web/src/app/admin/connector/[ccPairId]/types.ts b/web/src/app/admin/connector/[ccPairId]/types.ts index ab4921180cf..1cc43311e21 100644 --- a/web/src/app/admin/connector/[ccPairId]/types.ts +++ b/web/src/app/admin/connector/[ccPairId]/types.ts @@ -1,16 +1,22 @@ -import { - Connector, - Credential, - DeletionAttemptSnapshot, - IndexAttemptSnapshot, -} from "@/lib/types"; +import { Connector } from "@/lib/connectors/connectors"; +import { Credential } from "@/lib/connectors/credentials"; +import { DeletionAttemptSnapshot, IndexAttemptSnapshot } from "@/lib/types"; + +export enum ConnectorCredentialPairStatus { + ACTIVE = "ACTIVE", + PAUSED = "PAUSED", + DELETING = "DELETING", +} export interface CCPairFullInfo { id: number; name: string; + status: ConnectorCredentialPairStatus; num_docs_indexed: number; connector: Connector; credential: Credential; index_attempts: IndexAttemptSnapshot[]; latest_deletion_attempt: DeletionAttemptSnapshot | null; + is_public: boolean; + is_editable_for_current_user: boolean; } diff --git a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx new file mode 100644 index 00000000000..dd8d19ca720 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx @@ -0,0 +1,617 @@ +"use client"; + +import * as Yup from "yup"; +import { TrashIcon } from "@/components/icons/icons"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import useSWR, { mutate } from "swr"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; + +import { Card, Divider, Title } from "@tremor/react"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { useFormContext } from "@/components/context/FormContext"; +import { getSourceDisplayName } from "@/lib/sources"; +import { SourceIcon } from "@/components/SourceIcon"; +import { useRef, useState, useEffect } from "react"; +import { submitConnector } from "@/components/admin/connectors/ConnectorForm"; +import { deleteCredential, linkCredential } from "@/lib/credential"; +import { submitFiles } from "./pages/utils/files"; +import { submitGoogleSite } from "./pages/utils/google_site"; +import AdvancedFormPage from "./pages/Advanced"; +import DynamicConnectionForm from "./pages/DynamicConnectorCreationForm"; +import CreateCredential from "@/components/credentials/actions/CreateCredential"; +import ModifyCredential from "@/components/credentials/actions/ModifyCredential"; +import { ValidSources } from "@/lib/types"; +import { Credential, credentialTemplates } from "@/lib/connectors/credentials"; +import { + ConnectionConfiguration, + connectorConfigs, +} from "@/lib/connectors/connectors"; +import { Modal } from "@/components/Modal"; +import { ArrowRight } from "@phosphor-icons/react"; +import { ArrowLeft } from "@phosphor-icons/react/dist/ssr"; +import { FiPlus } from "react-icons/fi"; +import GDriveMain from "./pages/gdrive/GoogleDrivePage"; +import { GmailMain } from "./pages/gmail/GmailPage"; +import { + useGmailCredentials, + useGoogleDriveCredentials, +} from "./pages/utils/hooks"; +import { Formik, FormikProps } from "formik"; +import { + IsPublicGroupSelector, + IsPublicGroupSelectorFormType, +} from "@/components/IsPublicGroupSelector"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { AdminBooleanFormField } from "@/components/credentials/CredentialFields"; + +export type AdvancedConfigFinal = { + pruneFreq: number | null; + refreshFreq: number | null; + indexingStart: Date | null; +}; + +export default function AddConnector({ + connector, +}: { + connector: ValidSources; +}) { + const [currentCredential, setCurrentCredential] = + useState | null>(null); + + const { data: credentials } = useSWR[]>( + buildSimilarCredentialInfoURL(connector), + errorHandlingFetcher, + { refreshInterval: 5000 } + ); + + const { data: editableCredentials } = useSWR[]>( + buildSimilarCredentialInfoURL(connector, true), + errorHandlingFetcher, + { refreshInterval: 5000 } + ); + const [selectedFiles, setSelectedFiles] = useState([]); + + const credentialTemplate = credentialTemplates[connector]; + + const { + setFormStep, + setAllowAdvanced, + setAlowCreate, + formStep, + nextFormStep, + prevFormStep, + } = useFormContext(); + + const { popup, setPopup } = usePopup(); + + const configuration: ConnectionConfiguration = connectorConfigs[connector]; + const [formValues, setFormValues] = useState< + Record & IsPublicGroupSelectorFormType + >({ + name: "", + groups: [], + is_public: false, + ...configuration.values.reduce( + (acc, field) => { + if (field.type === "list") { + acc[field.name] = field.default || []; + } else if (field.type === "checkbox") { + acc[field.name] = field.default || false; + } else if (field.default !== undefined) { + acc[field.name] = field.default; + } + return acc; + }, + {} as { [record: string]: any } + ), + }); + + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); + + // Default to 10 minutes unless otherwise specified + const defaultAdvancedSettings = { + refreshFreq: formValues.overrideDefaultFreq || 10, + pruneFreq: 30, + indexingStart: null as string | null, + }; + + const [advancedSettings, setAdvancedSettings] = useState( + defaultAdvancedSettings + ); + + const [createConnectorToggle, setCreateConnectorToggle] = useState(false); + const formRef = useRef>(null); + + const [isFormValid, setIsFormValid] = useState(false); + + const handleFormStatusChange = (isValid: boolean) => { + setIsFormValid(isValid || connector == "file"); + }; + + const { liveGDriveCredential } = useGoogleDriveCredentials(); + + const { liveGmailCredential } = useGmailCredentials(); + + const credentialActivated = + (connector === "google_drive" && liveGDriveCredential) || + (connector === "gmail" && liveGmailCredential) || + currentCredential; + + const noCredentials = credentialTemplate == null; + + if (noCredentials && 1 != formStep) { + setFormStep(Math.max(1, formStep)); + } + + if (!noCredentials && !credentialActivated && formStep != 0) { + setFormStep(Math.min(formStep, 0)); + } + + const resetAdvancedConfigs = (formikProps: FormikProps) => { + formikProps.resetForm({ values: defaultAdvancedSettings }); + setAdvancedSettings(defaultAdvancedSettings); + }; + + const convertStringToDateTime = (indexingStart: string | null) => { + return indexingStart ? new Date(indexingStart) : null; + }; + + const createConnector = async () => { + const { + name, + groups, + is_public: isPublic, + ...connector_specific_config + } = formValues; + const { pruneFreq, indexingStart, refreshFreq } = advancedSettings; + + // Apply transforms from connectors.ts configuration + const transformedConnectorSpecificConfig = Object.entries( + connector_specific_config + ).reduce( + (acc, [key, value]) => { + const matchingConfigValue = configuration.values.find( + (configValue) => configValue.name === key + ); + if ( + matchingConfigValue && + "transform" in matchingConfigValue && + matchingConfigValue.transform + ) { + acc[key] = matchingConfigValue.transform(value as string[]); + } else { + acc[key] = value; + } + return acc; + }, + {} as Record + ); + + const AdvancedConfig: AdvancedConfigFinal = { + pruneFreq: advancedSettings.pruneFreq * 60 * 60 * 24, + indexingStart: convertStringToDateTime(indexingStart), + refreshFreq: advancedSettings.refreshFreq * 60, + }; + + // google sites-specific handling + if (connector == "google_site") { + const response = await submitGoogleSite( + selectedFiles, + formValues?.base_url, + setPopup, + AdvancedConfig, + name + ); + if (response) { + setTimeout(() => { + window.open("/admin/indexing/status", "_self"); + }, 1000); + } + return; + } + + // file-specific handling + if (connector == "file" && selectedFiles.length > 0) { + const response = await submitFiles( + selectedFiles, + setPopup, + setSelectedFiles, + name, + AdvancedConfig, + isPublic, + groups + ); + if (response) { + setTimeout(() => { + window.open("/admin/indexing/status", "_self"); + }, 1000); + } + return; + } + + const { message, isSuccess, response } = await submitConnector( + { + connector_specific_config: transformedConnectorSpecificConfig, + input_type: connector == "web" ? "load_state" : "poll", // single case + name: name, + source: connector, + refresh_freq: refreshFreq * 60 || null, + prune_freq: pruneFreq * 60 * 60 * 24 || null, + indexing_start: convertStringToDateTime(indexingStart), + is_public: isPublic, + groups: groups, + }, + undefined, + credentialActivated ? false : true, + isPublic + ); + // If no credential + if (!credentialActivated) { + if (isSuccess) { + setPopup({ + message: "Connector created! Redirecting to connector home page", + type: "success", + }); + setTimeout(() => { + window.open("/admin/indexing/status", "_self"); + }, 1000); + } else { + setPopup({ message: message, type: "error" }); + } + } + + // Without credential + if (credentialActivated && isSuccess && response) { + const credential = + currentCredential || liveGDriveCredential || liveGmailCredential; + const linkCredentialResponse = await linkCredential( + response.id, + credential?.id!, + name, + isPublic, + groups + ); + if (linkCredentialResponse.ok) { + setPopup({ + message: "Connector created! Redirecting to connector home page", + type: "success", + }); + setTimeout(() => { + window.open("/admin/indexing/status", "_self"); + }, 1000); + } else { + const errorData = await linkCredentialResponse.json(); + setPopup({ + message: errorData.message, + type: "error", + }); + } + } else if (isSuccess) { + setPopup({ + message: + "Credential created succsfully! Redirecting to connector home page", + type: "success", + }); + } else { + setPopup({ message: message, type: "error" }); + } + }; + + const displayName = getSourceDisplayName(connector) || connector; + if (!credentials || !editableCredentials) { + return <>; + } + + const refresh = () => { + mutate(buildSimilarCredentialInfoURL(connector)); + }; + const onDeleteCredential = async (credential: Credential) => { + const response = await deleteCredential(credential.id, true); + if (response.ok) { + setPopup({ + message: "Credential deleted successfully!", + type: "success", + }); + } else { + const errorData = await response.json(); + setPopup({ + message: errorData.message, + type: "error", + }); + } + }; + + const onSwap = async (selectedCredential: Credential) => { + setCurrentCredential(selectedCredential); + setAlowCreate(true); + setPopup({ + message: "Swapped credential successfully!", + type: "success", + }); + refresh(); + }; + + const validationSchema = Yup.object().shape({ + name: Yup.string().required("Connector Name is required"), + ...configuration.values.reduce( + (acc, field) => { + let schema: any = + field.type === "list" + ? Yup.array().of(Yup.string()) + : field.type === "checkbox" + ? Yup.boolean() + : Yup.string(); + + if (!field.optional) { + schema = schema.required(`${field.label} is required`); + } + acc[field.name] = schema; + return acc; + }, + {} as Record + ), + }); + + const advancedValidationSchema = Yup.object().shape({ + indexingStart: Yup.string().nullable(), + pruneFreq: Yup.number().min(0, "Prune frequency must be non-negative"), + refreshFreq: Yup.number().min(0, "Refresh frequency must be non-negative"), + }); + + const isFormSubmittable = (values: any) => { + return ( + values.name.trim() !== "" && + Object.keys(values).every((key) => { + const field = configuration.values.find((f) => f.name === key); + return field?.optional || values[key] !== ""; + }) + ); + }; + + return ( +
+ {popup} +
+ +
+ + } + title={displayName} + /> + + {formStep == 0 && + (connector == "google_drive" ? ( + <> + + Select a credential + + +
+ +
+ + ) : connector == "gmail" ? ( + <> + + Select a credential + + +
+ +
+ + ) : ( + <> + + Select a credential + + {!createConnectorToggle && ( + + )} + + {!(connector == "google_drive") && createConnectorToggle && ( + setCreateConnectorToggle(false)} + > + <> + + Create a {getSourceDisplayName(connector)} credential + + setCreateConnectorToggle(false)} + /> + + + )} + +
+ +
+ + ))} + + {formStep == 1 && ( + <> + + { + // Can be utilized for logging purposes + }} + > + {(formikProps) => { + setFormValues(formikProps.values); + handleFormStatusChange( + formikProps.isValid && isFormSubmittable(formikProps.values) + ); + setAllowAdvanced( + formikProps.isValid && isFormSubmittable(formikProps.values) + ); + + return ( +
+ + {isPaidEnterpriseFeaturesEnabled && ( + <> + + + )} +
+ ); + }} +
+
+
+ {!noCredentials ? ( + + ) : ( +
+ )} + + + {!(connector == "file") && ( +
+ +
+ )} +
+ + )} + + {formStep === 2 && ( + <> + + {}} + > + {(formikProps) => { + setAdvancedSettings(formikProps.values); + + return ( + <> + +
+ +
+ + ); + }} +
+
+
+ + +
+ + )} +
+ ); +} diff --git a/web/src/app/admin/connectors/[connector]/ConnectorWrapper.tsx b/web/src/app/admin/connectors/[connector]/ConnectorWrapper.tsx new file mode 100644 index 00000000000..345ace085bc --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/ConnectorWrapper.tsx @@ -0,0 +1,37 @@ +"use client"; + +import { ValidSources } from "@/lib/types"; +import AddConnector from "./AddConnectorPage"; +import { FormProvider } from "@/components/context/FormContext"; +import Sidebar from "./Sidebar"; +import { HeaderTitle } from "@/components/header/HeaderTitle"; +import { Button } from "@tremor/react"; +import { isValidSource } from "@/lib/sources"; + +export default function ConnectorWrapper({ connector }: { connector: string }) { + return ( + +
+ +
+ {!isValidSource(connector) ? ( +
+ +

‘{connector}‘ is not a valid Connector Type!

+
+ +
+ ) : ( + + )} +
+
+
+ ); +} diff --git a/web/src/app/admin/connectors/[connector]/Sidebar.tsx b/web/src/app/admin/connectors/[connector]/Sidebar.tsx new file mode 100644 index 00000000000..97275843e0c --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/Sidebar.tsx @@ -0,0 +1,116 @@ +import { useFormContext } from "@/components/context/FormContext"; +import { HeaderTitle } from "@/components/header/HeaderTitle"; + +import { BackIcon, SettingsIcon } from "@/components/icons/icons"; +import { Logo } from "@/components/Logo"; +import { SettingsContext } from "@/components/settings/SettingsProvider"; +import { credentialTemplates } from "@/lib/connectors/credentials"; +import Link from "next/link"; +import { useContext } from "react"; + +export default function Sidebar() { + const { formStep, setFormStep, connector, allowAdvanced, allowCreate } = + useFormContext(); + const combinedSettings = useContext(SettingsContext); + if (!combinedSettings) { + return null; + } + const enterpriseSettings = combinedSettings.enterpriseSettings; + const noCredential = credentialTemplates[connector] == null; + + const settingSteps = [ + ...(!noCredential ? ["Credential"] : []), + "Connector", + ...(connector == "file" ? [] : ["Advanced (optional)"]), + ]; + + return ( +
+
+
+
+
+ +
+ +
+ {enterpriseSettings && enterpriseSettings.application_name ? ( + {enterpriseSettings.application_name} + ) : ( +

+ EveAI +

+ )} +
+
+ +
+ + +

Admin Page

+ +
+ +
+
+
+ {connector != "file" && ( +
+ )} + {settingSteps.map((step, index) => { + const allowed = + (step == "Connector" && allowCreate) || + (step == "Advanced (optional)" && allowAdvanced) || + index <= formStep; + + return ( +
{ + if (allowed) { + setFormStep(index - (noCredential ? 1 : 0)); + } + }} + > +
+
+ {formStep === index && ( +
+ )} +
+
+
+ {step} +
+
+ ); + })} +
+
+
+
+
+
+ ); +} diff --git a/web/src/app/admin/connectors/google-drive/auth/callback/route.ts b/web/src/app/admin/connectors/[connector]/auth/callback/route.ts similarity index 54% rename from web/src/app/admin/connectors/google-drive/auth/callback/route.ts rename to web/src/app/admin/connectors/[connector]/auth/callback/route.ts index 3a82df64c8f..9d80e1b2fd2 100644 --- a/web/src/app/admin/connectors/google-drive/auth/callback/route.ts +++ b/web/src/app/admin/connectors/[connector]/auth/callback/route.ts @@ -2,13 +2,16 @@ import { getDomain } from "@/lib/redirectSS"; import { buildUrl } from "@/lib/utilsSS"; import { NextRequest, NextResponse } from "next/server"; import { cookies } from "next/headers"; -import { GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; +import { + GMAIL_AUTH_IS_ADMIN_COOKIE_NAME, + GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME, +} from "@/lib/constants"; import { processCookies } from "@/lib/userSS"; export const GET = async (request: NextRequest) => { - // Wrapper around the FastAPI endpoint /connectors/google-drive/callback, - // which adds back a redirect to the Google Drive admin page. - const url = new URL(buildUrl("/manage/connector/google-drive/callback")); + const connector = request.url.includes("gmail") ? "gmail" : "google-drive"; + const callbackEndpoint = `/manage/connector/${connector}/callback`; + const url = new URL(buildUrl(callbackEndpoint)); url.search = request.nextUrl.search; const response = await fetch(url.toString(), { @@ -19,20 +22,22 @@ export const GET = async (request: NextRequest) => { if (!response.ok) { console.log( - "Error in Google Drive callback:", + `Error in ${connector} callback:`, (await response.json()).detail ); return NextResponse.redirect(new URL("/auth/error", getDomain(request))); } - if ( - cookies() - .get(GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME) - ?.value?.toLowerCase() === "true" - ) { + const authCookieName = + connector === "gmail" + ? GMAIL_AUTH_IS_ADMIN_COOKIE_NAME + : GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME; + + if (cookies().get(authCookieName)?.value?.toLowerCase() === "true") { return NextResponse.redirect( - new URL("/admin/connectors/google-drive", getDomain(request)) + new URL(`/admin/connectors/${connector}`, getDomain(request)) ); } + return NextResponse.redirect(new URL("/user/connectors", getDomain(request))); }; diff --git a/web/src/app/admin/connectors/[connector]/page.tsx b/web/src/app/admin/connectors/[connector]/page.tsx new file mode 100644 index 00000000000..265d6922ebc --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/page.tsx @@ -0,0 +1,9 @@ +import ConnectorWrapper from "./ConnectorWrapper"; + +export default async function Page({ + params, +}: { + params: { connector: string }; +}) { + return ; +} diff --git a/web/src/app/admin/connectors/[connector]/pages/Advanced.tsx b/web/src/app/admin/connectors/[connector]/pages/Advanced.tsx new file mode 100644 index 00000000000..470ab8d2a77 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/Advanced.tsx @@ -0,0 +1,68 @@ +import React, { Dispatch, forwardRef, SetStateAction } from "react"; +import { Formik, Form, FormikProps } from "formik"; +import * as Yup from "yup"; +import NumberInput from "./ConnectorInput/NumberInput"; +import { TextFormField } from "@/components/admin/connectors/Field"; + +interface AdvancedFormPageProps { + formikProps: FormikProps<{ + indexingStart: string | null; + pruneFreq: number; + refreshFreq: number; + }>; +} + +const AdvancedFormPage = forwardRef, AdvancedFormPageProps>( + ({ formikProps }, ref) => { + const { indexingStart, refreshFreq, pruneFreq } = formikProps.values; + + return ( +
+

+ Advanced Configuration +

+ + +
+ +
+ +
+ +
+ +
+ +
+ +
+ ); + } +); + +AdvancedFormPage.displayName = "AdvancedFormPage"; +export default AdvancedFormPage; diff --git a/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput.tsx b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput.tsx new file mode 100644 index 00000000000..50af2dfff70 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/FileInput.tsx @@ -0,0 +1,37 @@ +import { FileUpload } from "@/components/admin/connectors/FileUpload"; +import CredentialSubText from "@/components/credentials/CredentialFields"; + +interface FileInputProps { + name: string; + label: string; + optional?: boolean; + description?: string; + selectedFiles: File[]; + setSelectedFiles: (files: File[]) => void; +} + +export default function FileInput({ + name, + label, + optional = false, + description, + selectedFiles, + setSelectedFiles, +}: FileInputProps) { + return ( + <> + + {description && {description}} + + + ); +} diff --git a/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/ListInput.tsx b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/ListInput.tsx new file mode 100644 index 00000000000..059d08d539d --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/ListInput.tsx @@ -0,0 +1,74 @@ +import CredentialSubText from "@/components/credentials/CredentialFields"; +import { TrashIcon } from "@/components/icons/icons"; +import { ListOption } from "@/lib/connectors/connectors"; +import { Field, FieldArray, useField } from "formik"; +import { FaPlus } from "react-icons/fa"; + +export default function ListInput({ + field, + onUpdate, +}: { + field: ListOption; + onUpdate?: (values: string[]) => void; +}) { + const [fieldProps, , helpers] = useField(field.name); + + return ( + + {({ push, remove }) => ( +
+ + {field.description && ( + {field.description} + )} + + {fieldProps.value.map((value: string, index: number) => ( +
+ + +
+ ))} + + +
+ )} +
+ ); +} diff --git a/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/NumberInput.tsx b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/NumberInput.tsx new file mode 100644 index 00000000000..5a9f5041b5d --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/NumberInput.tsx @@ -0,0 +1,42 @@ +import { SubLabel } from "@/components/admin/connectors/Field"; +import { Field } from "formik"; + +export default function NumberInput({ + label, + value, + optional, + description, + name, + showNeverIfZero, +}: { + value?: number; + label: string; + name: string; + optional?: boolean; + description?: string; + showNeverIfZero?: boolean; +}) { + return ( +
+ + {description && {description}} + + +
+ ); +} diff --git a/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/SelectInput.tsx b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/SelectInput.tsx new file mode 100644 index 00000000000..e01a02dc323 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/ConnectorInput/SelectInput.tsx @@ -0,0 +1,45 @@ +import CredentialSubText from "@/components/credentials/CredentialFields"; +import { ListOption, SelectOption } from "@/lib/connectors/connectors"; +import { Field } from "formik"; + +export default function SelectInput({ + field, + value, + onChange, +}: { + field: SelectOption; + value: any; + onChange?: (e: Event) => void; +}) { + return ( + <> + + {field.description && ( + {field.description} + )} + + + + {field.options?.map((option: any) => ( + + ))} + + + ); +} diff --git a/web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx b/web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx new file mode 100644 index 00000000000..507b976f9a8 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx @@ -0,0 +1,115 @@ +import React, { + ChangeEvent, + Dispatch, + FC, + SetStateAction, + useEffect, + useState, +} from "react"; +import { Formik, Form, Field, FieldArray, FormikProps } from "formik"; +import * as Yup from "yup"; +import { FaPlus } from "react-icons/fa"; +import { useUserGroups } from "@/lib/hooks"; +import { UserGroup, User, UserRole } from "@/lib/types"; +import { Divider } from "@tremor/react"; +import CredentialSubText, { + AdminBooleanFormField, +} from "@/components/credentials/CredentialFields"; +import { TrashIcon } from "@/components/icons/icons"; +import { FileUpload } from "@/components/admin/connectors/FileUpload"; +import { ConnectionConfiguration } from "@/lib/connectors/connectors"; +import { useFormContext } from "@/components/context/FormContext"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { Text } from "@tremor/react"; +import { getCurrentUser } from "@/lib/user"; +import { FiUsers } from "react-icons/fi"; +import SelectInput from "./ConnectorInput/SelectInput"; +import NumberInput from "./ConnectorInput/NumberInput"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import ListInput from "./ConnectorInput/ListInput"; +import FileInput from "./ConnectorInput/FileInput"; + +export interface DynamicConnectionFormProps { + config: ConnectionConfiguration; + selectedFiles: File[]; + setSelectedFiles: Dispatch>; + values: any; +} + +const DynamicConnectionForm: FC = ({ + config, + selectedFiles, + setSelectedFiles, + values, +}) => { + return ( + <> +

{config.description}

+ + {config.subtext && ( + {config.subtext} + )} + + + + {config.values.map((field) => { + if (!field.hidden) { + return ( +
+ {field.type == "file" ? ( + + ) : field.type == "zip" ? ( + + ) : field.type === "list" ? ( + + ) : field.type === "select" ? ( + + ) : field.type === "number" ? ( + + ) : field.type === "checkbox" ? ( + + ) : ( + + )} +
+ ); + } + })} + + ); +}; + +export default DynamicConnectionForm; diff --git a/web/src/app/admin/connectors/[connector]/pages/formelements/NumberInput.tsx b/web/src/app/admin/connectors/[connector]/pages/formelements/NumberInput.tsx new file mode 100644 index 00000000000..5a9f5041b5d --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/formelements/NumberInput.tsx @@ -0,0 +1,42 @@ +import { SubLabel } from "@/components/admin/connectors/Field"; +import { Field } from "formik"; + +export default function NumberInput({ + label, + value, + optional, + description, + name, + showNeverIfZero, +}: { + value?: number; + label: string; + name: string; + optional?: boolean; + description?: string; + showNeverIfZero?: boolean; +}) { + return ( +
+ + {description && {description}} + + +
+ ); +} diff --git a/web/src/app/admin/connectors/google-drive/Credential.tsx b/web/src/app/admin/connectors/[connector]/pages/gdrive/Credential.tsx similarity index 86% rename from web/src/app/admin/connectors/google-drive/Credential.tsx rename to web/src/app/admin/connectors/[connector]/pages/gdrive/Credential.tsx index 849057e71e3..a320d466b40 100644 --- a/web/src/app/admin/connectors/google-drive/Credential.tsx +++ b/web/src/app/admin/connectors/[connector]/pages/gdrive/Credential.tsx @@ -4,11 +4,6 @@ import { useState } from "react"; import { useSWRConfig } from "swr"; import * as Yup from "yup"; import { useRouter } from "next/navigation"; -import { - Credential, - GoogleDriveCredentialJson, - GoogleDriveServiceAccountCredentialJson, -} from "@/lib/types"; import { adminDeleteCredential } from "@/lib/credential"; import { setupGoogleDriveOAuth } from "@/lib/googleDrive"; import { GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; @@ -16,10 +11,15 @@ import Cookies from "js-cookie"; import { TextFormField } from "@/components/admin/connectors/Field"; import { Form, Formik } from "formik"; import { Card } from "@tremor/react"; +import { + Credential, + GoogleDriveCredentialJson, + GoogleDriveServiceAccountCredentialJson, +} from "@/lib/connectors/credentials"; type GoogleDriveCredentialJsonTypes = "authorized_user" | "service_account"; -const DriveJsonUpload = ({ +export const DriveJsonUpload = ({ setPopup, }: { setPopup: (popupSpec: PopupSpec | null) => void; @@ -33,8 +33,8 @@ const DriveJsonUpload = ({ <> void; appCredentialData?: { client_id: string }; serviceAccountCredentialData?: { service_account_email: string }; + isAdmin: boolean; } export const DriveJsonUploadSection = ({ setPopup, appCredentialData, serviceAccountCredentialData, + isAdmin, }: DriveJsonUploadSectionProps) => { const { mutate } = useSWRConfig(); @@ -165,38 +167,48 @@ export const DriveJsonUploadSection = ({ {serviceAccountCredentialData.service_account_email}

-
- If you want to update these credentials, delete the existing - credentials through the button below, and then upload a new - credentials JSON. -
- + {isAdmin ? ( + <> +
+ If you want to update these credentials, delete the existing + credentials through the button below, and then upload a new + credentials JSON. +
+ + + ) : ( + <> +
+ To change these credentials, please contact an administrator. +
+ + )}
); } @@ -242,6 +254,17 @@ export const DriveJsonUploadSection = ({ ); } + if (!isAdmin) { + return ( +
+

+ Curators are unable to set up the google drive credentials. To add a + Google Drive connector, please contact an administrator. +

+
+ ); + } + return (

diff --git a/web/src/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage.tsx b/web/src/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage.tsx new file mode 100644 index 00000000000..4494e4b22ee --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/gdrive/GoogleDrivePage.tsx @@ -0,0 +1,155 @@ +"use client"; + +import React from "react"; +import { useState, useEffect } from "react"; +import useSWR from "swr"; +import { FetchError, errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { LoadingAnimation } from "@/components/Loading"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorIndexingStatus } from "@/lib/types"; +import { getCurrentUser } from "@/lib/user"; +import { User, UserRole } from "@/lib/types"; +import { usePublicCredentials } from "@/lib/hooks"; +import { Title } from "@tremor/react"; +import { DriveJsonUploadSection, DriveOAuthSection } from "./Credential"; +import { + Credential, + GoogleDriveCredentialJson, + GoogleDriveServiceAccountCredentialJson, +} from "@/lib/connectors/credentials"; +import { GoogleDriveConfig } from "@/lib/connectors/connectors"; +import { useUser } from "@/components/user/UserProvider"; +import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; + +const GDriveMain = ({}: {}) => { + const { isLoadingUser, isAdmin } = useUser(); + + const { + data: appCredentialData, + isLoading: isAppCredentialLoading, + error: isAppCredentialError, + } = useSWR<{ client_id: string }, FetchError>( + "/api/manage/admin/connector/google-drive/app-credential", + errorHandlingFetcher + ); + + const { + data: serviceAccountKeyData, + isLoading: isServiceAccountKeyLoading, + error: isServiceAccountKeyError, + } = useSWR<{ service_account_email: string }, FetchError>( + "/api/manage/admin/connector/google-drive/service-account-key", + errorHandlingFetcher + ); + + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useConnectorCredentialIndexingStatus(); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + const { popup, setPopup } = usePopup(); + + const appCredentialSuccessfullyFetched = + appCredentialData || + (isAppCredentialError && isAppCredentialError.status === 404); + const serviceAccountKeySuccessfullyFetched = + serviceAccountKeyData || + (isServiceAccountKeyError && isServiceAccountKeyError.status === 404); + + if (isLoadingUser) { + return <>; + } + + if ( + (!appCredentialSuccessfullyFetched && isAppCredentialLoading) || + (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) || + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ( +

+ +
+ ); + } + + if (credentialsError || !credentialsData) { + return ; + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ; + } + + if ( + !appCredentialSuccessfullyFetched || + !serviceAccountKeySuccessfullyFetched + ) { + return ( + + ); + } + + const googleDrivePublicCredential: + | Credential + | undefined = credentialsData.find( + (credential) => + credential.credential_json?.google_drive_tokens && credential.admin_public + ); + const googleDriveServiceAccountCredential: + | Credential + | undefined = credentialsData.find( + (credential) => credential.credential_json?.google_drive_service_account_key + ); + const googleDriveConnectorIndexingStatuses: ConnectorIndexingStatus< + GoogleDriveConfig, + GoogleDriveCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "google_drive" + ); + + return ( + <> + {popup} + + Step 1: Provide your Credentials + + + + {isAdmin && ( + <> + + Step 2: Authenticate with Danswer + + 0} + /> + + )} + + ); +}; + +export default GDriveMain; diff --git a/web/src/app/admin/connectors/gmail/Credential.tsx b/web/src/app/admin/connectors/[connector]/pages/gmail/Credential.tsx similarity index 86% rename from web/src/app/admin/connectors/gmail/Credential.tsx rename to web/src/app/admin/connectors/[connector]/pages/gmail/Credential.tsx index 68f5bba2d20..8b456884f1a 100644 --- a/web/src/app/admin/connectors/gmail/Credential.tsx +++ b/web/src/app/admin/connectors/[connector]/pages/gmail/Credential.tsx @@ -4,11 +4,6 @@ import { useState } from "react"; import { useSWRConfig } from "swr"; import * as Yup from "yup"; import { useRouter } from "next/navigation"; -import { - Credential, - GmailCredentialJson, - GmailServiceAccountCredentialJson, -} from "@/lib/types"; import { adminDeleteCredential } from "@/lib/credential"; import { setupGmailOAuth } from "@/lib/gmail"; import { GMAIL_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; @@ -16,6 +11,11 @@ import Cookies from "js-cookie"; import { TextFormField } from "@/components/admin/connectors/Field"; import { Form, Formik } from "formik"; import { Card } from "@tremor/react"; +import { + Credential, + GmailCredentialJson, + GmailServiceAccountCredentialJson, +} from "@/lib/connectors/credentials"; type GmailCredentialJsonTypes = "authorized_user" | "service_account"; @@ -33,8 +33,8 @@ const DriveJsonUpload = ({ <> void; appCredentialData?: { client_id: string }; serviceAccountCredentialData?: { service_account_email: string }; + isAdmin: boolean; } export const GmailJsonUploadSection = ({ setPopup, appCredentialData, serviceAccountCredentialData, + isAdmin, }: DriveJsonUploadSectionProps) => { const { mutate } = useSWRConfig(); @@ -163,36 +165,48 @@ export const GmailJsonUploadSection = ({ {serviceAccountCredentialData.service_account_email}

-
- If you want to update these credentials, delete the existing - credentials through the button below, and then upload a new - credentials JSON. -
- + {isAdmin ? ( + <> +
+ If you want to update these credentials, delete the existing + credentials through the button below, and then upload a new + credentials JSON. +
+ + + ) : ( + <> +
+ To change these credentials, please contact an administrator. +
+ + )}
); } @@ -238,6 +252,17 @@ export const GmailJsonUploadSection = ({ ); } + if (!isAdmin) { + return ( +
+

+ Curators are unable to set up the Gmail credentials. To add a Gmail + connector, please contact an administrator. +

+
+ ); + } + return (

diff --git a/web/src/app/admin/connectors/[connector]/pages/gmail/GmailPage.tsx b/web/src/app/admin/connectors/[connector]/pages/gmail/GmailPage.tsx new file mode 100644 index 00000000000..5f52eb31013 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/gmail/GmailPage.tsx @@ -0,0 +1,159 @@ +"use client"; + +import useSWR from "swr"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorIndexingStatus } from "@/lib/types"; +import { getCurrentUser } from "@/lib/user"; +import { User, UserRole } from "@/lib/types"; +import { + Credential, + GmailCredentialJson, + GmailServiceAccountCredentialJson, +} from "@/lib/connectors/credentials"; +import { GmailOAuthSection, GmailJsonUploadSection } from "./Credential"; +import { usePublicCredentials } from "@/lib/hooks"; +import { Title } from "@tremor/react"; +import { GmailConfig } from "@/lib/connectors/connectors"; +import { useState, useEffect } from "react"; +import { useUser } from "@/components/user/UserProvider"; +import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; + +export const GmailMain = () => { + const { isLoadingUser, isAdmin } = useUser(); + + const { + data: appCredentialData, + isLoading: isAppCredentialLoading, + error: isAppCredentialError, + } = useSWR<{ client_id: string }>( + "/api/manage/admin/connector/gmail/app-credential", + errorHandlingFetcher + ); + const { + data: serviceAccountKeyData, + isLoading: isServiceAccountKeyLoading, + error: isServiceAccountKeyError, + } = useSWR<{ service_account_email: string }>( + "/api/manage/admin/connector/gmail/service-account-key", + errorHandlingFetcher + ); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useConnectorCredentialIndexingStatus(); + + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + const { popup, setPopup } = usePopup(); + + const appCredentialSuccessfullyFetched = + appCredentialData || + (isAppCredentialError && isAppCredentialError.status === 404); + const serviceAccountKeySuccessfullyFetched = + serviceAccountKeyData || + (isServiceAccountKeyError && isServiceAccountKeyError.status === 404); + + if (isLoadingUser) { + return <>; + } + + if ( + (!appCredentialSuccessfullyFetched && isAppCredentialLoading) || + (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) || + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ( +

+ +
+ ); + } + + if (credentialsError || !credentialsData) { + return ( +
+
Failed to load credentials.
+
+ ); + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( +
+
Failed to load connectors.
+
+ ); + } + + if ( + !appCredentialSuccessfullyFetched || + !serviceAccountKeySuccessfullyFetched + ) { + return ( +
+
+ Error loading Gmail app credentials. Contact an administrator. +
+
+ ); + } + + const gmailPublicCredential: Credential | undefined = + credentialsData.find( + (credential) => + credential.credential_json?.gmail_tokens && credential.admin_public + ); + const gmailServiceAccountCredential: + | Credential + | undefined = credentialsData.find( + (credential) => credential.credential_json?.gmail_service_account_key + ); + const gmailConnectorIndexingStatuses: ConnectorIndexingStatus< + GmailConfig, + GmailCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "gmail" + ); + + return ( + <> + {popup} + + Step 1: Provide your Credentials + + + + {isAdmin && ( + <> + + Step 2: Authenticate with Danswer + + 0} + /> + + )} + + ); +}; diff --git a/web/src/app/admin/connectors/[connector]/pages/utils/files.ts b/web/src/app/admin/connectors/[connector]/pages/utils/files.ts new file mode 100644 index 00000000000..d847efe89d1 --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/utils/files.ts @@ -0,0 +1,112 @@ +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { createConnector, runConnector } from "@/lib/connector"; +import { createCredential, linkCredential } from "@/lib/credential"; +import { FileConfig } from "@/lib/connectors/connectors"; +import { AdvancedConfigFinal } from "../../AddConnectorPage"; + +export const submitFiles = async ( + selectedFiles: File[], + setPopup: (popup: PopupSpec) => void, + setSelectedFiles: (files: File[]) => void, + name: string, + advancedConfig: AdvancedConfigFinal, + isPublic: boolean, + groups?: number[] +) => { + const formData = new FormData(); + + selectedFiles.forEach((file) => { + formData.append("files", file); + }); + + const response = await fetch("/api/manage/admin/connector/file/upload", { + method: "POST", + body: formData, + }); + const responseJson = await response.json(); + if (!response.ok) { + setPopup({ + message: `Unable to upload files - ${responseJson.detail}`, + type: "error", + }); + return; + } + + const filePaths = responseJson.file_paths as string[]; + + const [connectorErrorMsg, connector] = await createConnector({ + name: "FileConnector-" + Date.now(), + source: "file", + input_type: "load_state", + connector_specific_config: { + file_locations: filePaths, + }, + refresh_freq: null, + prune_freq: null, + indexing_start: null, + is_public: isPublic, + groups: groups, + }); + if (connectorErrorMsg || !connector) { + setPopup({ + message: `Unable to create connector - ${connectorErrorMsg}`, + type: "error", + }); + return; + } + + // Since there is no "real" credential associated with a file connector + // we create a dummy one here so that we can associate the CC Pair with a + // user. This is needed since the user for a CC Pair is found via the credential + // associated with it. + const createCredentialResponse = await createCredential({ + credential_json: {}, + admin_public: true, + source: "file", + curator_public: isPublic, + groups: groups, + name, + }); + if (!createCredentialResponse.ok) { + const errorMsg = await createCredentialResponse.text(); + setPopup({ + message: `Error creating credential for CC Pair - ${errorMsg}`, + type: "error", + }); + return; + false; + } + const credentialId = (await createCredentialResponse.json()).id; + + const credentialResponse = await linkCredential( + connector.id, + credentialId, + name, + isPublic, + groups + ); + if (!credentialResponse.ok) { + const credentialResponseJson = await credentialResponse.json(); + setPopup({ + message: `Unable to link connector to credential - ${credentialResponseJson.detail}`, + type: "error", + }); + return false; + } + + const runConnectorErrorMsg = await runConnector(connector.id, [0]); + if (runConnectorErrorMsg) { + setPopup({ + message: `Unable to run connector - ${runConnectorErrorMsg}`, + type: "error", + }); + return false; + } + + setSelectedFiles([]); + setPopup({ + type: "success", + message: "Successfully uploaded files!", + }); + return true; +}; diff --git a/web/src/app/admin/connectors/[connector]/pages/utils/google_site.ts b/web/src/app/admin/connectors/[connector]/pages/utils/google_site.ts new file mode 100644 index 00000000000..f1689e8fcdf --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/utils/google_site.ts @@ -0,0 +1,87 @@ +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { createConnector, runConnector } from "@/lib/connector"; +import { linkCredential } from "@/lib/credential"; +import { GoogleSitesConfig } from "@/lib/connectors/connectors"; +import { AdvancedConfigFinal } from "../../AddConnectorPage"; + +export const submitGoogleSite = async ( + selectedFiles: File[], + base_url: any, + setPopup: (popup: PopupSpec) => void, + advancedConfig: AdvancedConfigFinal, + name?: string +) => { + const uploadCreateAndTriggerConnector = async () => { + const formData = new FormData(); + + selectedFiles.forEach((file) => { + formData.append("files", file); + }); + + const response = await fetch("/api/manage/admin/connector/file/upload", { + method: "POST", + body: formData, + }); + const responseJson = await response.json(); + if (!response.ok) { + setPopup({ + message: `Unable to upload files - ${responseJson.detail}`, + type: "error", + }); + return false; + } + + const filePaths = responseJson.file_paths as string[]; + const [connectorErrorMsg, connector] = + await createConnector({ + name: name ? name : `GoogleSitesConnector-${base_url}`, + source: "google_sites", + input_type: "load_state", + connector_specific_config: { + base_url: base_url, + zip_path: filePaths[0], + }, + refresh_freq: advancedConfig.refreshFreq, + prune_freq: advancedConfig.pruneFreq, + indexing_start: advancedConfig.indexingStart, + }); + if (connectorErrorMsg || !connector) { + setPopup({ + message: `Unable to create connector - ${connectorErrorMsg}`, + type: "error", + }); + return false; + } + + const credentialResponse = await linkCredential(connector.id, 0, base_url); + if (!credentialResponse.ok) { + const credentialResponseJson = await credentialResponse.json(); + setPopup({ + message: `Unable to link connector to credential - ${credentialResponseJson.detail}`, + type: "error", + }); + return false; + } + + const runConnectorErrorMsg = await runConnector(connector.id, [0]); + if (runConnectorErrorMsg) { + setPopup({ + message: `Unable to run connector - ${runConnectorErrorMsg}`, + type: "error", + }); + return false; + } + setPopup({ + type: "success", + message: "Successfully created Google Site connector!", + }); + return true; + }; + + try { + const response = await uploadCreateAndTriggerConnector(); + return response; + } catch (e) { + return false; + } +}; diff --git a/web/src/app/admin/connectors/[connector]/pages/utils/hooks.ts b/web/src/app/admin/connectors/[connector]/pages/utils/hooks.ts new file mode 100644 index 00000000000..d3a48e3a26b --- /dev/null +++ b/web/src/app/admin/connectors/[connector]/pages/utils/hooks.ts @@ -0,0 +1,65 @@ +import { GmailConfig } from "@/lib/connectors/connectors"; + +export const gmailConnectorNameBuilder = (values: GmailConfig) => + "GmailConnector"; + +import { usePublicCredentials } from "@/lib/hooks"; +import { + Credential, + GmailCredentialJson, + GmailServiceAccountCredentialJson, + GoogleDriveCredentialJson, + GoogleDriveServiceAccountCredentialJson, +} from "@/lib/connectors/credentials"; + +export const useGmailCredentials = () => { + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + const gmailPublicCredential: Credential | undefined = + credentialsData?.find( + (credential) => + credential.credential_json?.gmail_tokens && credential.admin_public + ); + + const gmailServiceAccountCredential: + | Credential + | undefined = credentialsData?.find( + (credential) => credential.credential_json?.gmail_service_account_key + ); + + const liveGmailCredential = + gmailPublicCredential || gmailServiceAccountCredential; + + return { + liveGmailCredential, + }; +}; + +export const useGoogleDriveCredentials = () => { + const { data: credentialsData } = usePublicCredentials(); + + const googleDrivePublicCredential: + | Credential + | undefined = credentialsData?.find( + (credential) => + credential.credential_json?.google_drive_tokens && credential.admin_public + ); + + const googleDriveServiceAccountCredential: + | Credential + | undefined = credentialsData?.find( + (credential) => credential.credential_json?.google_drive_service_account_key + ); + + const liveGDriveCredential = + googleDrivePublicCredential || googleDriveServiceAccountCredential; + + return { + liveGDriveCredential, + }; +}; diff --git a/web/src/app/admin/connectors/axero/page.tsx b/web/src/app/admin/connectors/axero/page.tsx deleted file mode 100644 index 6d4a5af8bcd..00000000000 --- a/web/src/app/admin/connectors/axero/page.tsx +++ /dev/null @@ -1,260 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { AxeroIcon, TrashIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - AxeroConfig, - AxeroCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Button, Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const axeroConnectorIndexingStatuses: ConnectorIndexingStatus< - AxeroConfig, - AxeroCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "axero" - ); - const axeroCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.axero_api_token - ); - - return ( - <> - - Step 1: Provide Axero API Key - - {axeroCredential ? ( - <> -
- Existing Axero API Key: - - {axeroCredential.credential_json.axero_api_token} - - -
- - ) : ( - <> -

- To use the Axero connector, first follow the guide{" "} - - here - {" "} - to generate an API Key. -

- - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - base_url: Yup.string().required( - "Please enter the base URL of your Axero instance" - ), - axero_api_token: Yup.string().required( - "Please enter your Axero API Token" - ), - })} - initialValues={{ - base_url: "", - axero_api_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which spaces do you want to connect? - - - {axeroConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest Articles, Blogs, Wikis and{" "} - Forums once per day. - -
- - connectorIndexingStatuses={axeroConnectorIndexingStatuses} - liveCredential={axeroCredential} - getCredential={(credential) => - credential.credential_json.axero_api_token - } - specialColumns={[ - { - header: "Space", - key: "spaces", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return connectorConfig.spaces && - connectorConfig.spaces.length > 0 - ? connectorConfig.spaces.join(", ") - : ""; - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (axeroCredential) { - await linkCredential(connectorId, axeroCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {axeroCredential ? ( - -

Configure an Axero Connector

- - nameBuilder={(values) => - values.spaces - ? `AxeroConnector-${values.spaces.join("_")}` - : `AxeroConnector` - } - source="axero" - inputType="poll" - formBodyBuilder={(values) => { - return ( - <> - - {TextArrayFieldBuilder({ - name: "spaces", - label: "Space IDs:", - subtext: ` - Specify zero or more Spaces to index (by the Space IDs). If no Space IDs - are specified, all Spaces will be indexed.`, - })(values)} - - ); - }} - validationSchema={Yup.object().shape({ - spaces: Yup.array() - .of(Yup.string().required("Space Ids cannot be empty")) - .required(), - })} - initialValues={{ - spaces: [], - }} - refreshFreq={60 * 60 * 24} // 1 day - credentialId={axeroCredential.id} - /> -
- ) : ( - - Please provide your Axero API Token in Step 1 first! Once done with - that, you can then specify which spaces you want to connect. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Axero" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx deleted file mode 100644 index dbf8bd367b1..00000000000 --- a/web/src/app/admin/connectors/bookstack/page.tsx +++ /dev/null @@ -1,261 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { BookstackIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - BookstackCredentialJson, - BookstackConfig, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const bookstackConnectorIndexingStatuses: ConnectorIndexingStatus< - BookstackConfig, - BookstackCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "bookstack" - ); - const bookstackCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.bookstack_api_token_id - ); - - return ( - <> - {popup} - - Step 1: Provide your API details - - - {bookstackCredential ? ( - <> -
- Existing API Token: - - {bookstackCredential.credential_json?.bookstack_api_token_id} - - -
- - ) : ( - <> - - To get started you'll need API token details for your BookStack - instance. You can get these by editing your (or another) user - account in BookStack and creating a token via the 'API - Tokens' section at the bottom. Your user account will require - to be assigned a BookStack role which has the 'Access system - API' system permission assigned. - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - bookstack_base_url: Yup.string().required( - "Please enter the base URL for your BookStack instance" - ), - bookstack_api_token_id: Yup.string().required( - "Please enter your BookStack API token ID" - ), - bookstack_api_token_secret: Yup.string().required( - "Please enter your BookStack API token secret" - ), - })} - initialValues={{ - bookstack_base_url: "", - bookstack_api_token_id: "", - bookstack_api_token_secret: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> - - - )} - - {bookstackConnectorIndexingStatuses.length > 0 && ( - <> - - BookStack indexing status - - - The latest page, chapter, book and shelf changes are fetched every - 10 minutes. - -
- - connectorIndexingStatuses={bookstackConnectorIndexingStatuses} - liveCredential={bookstackCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.bookstack_api_token_id}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (bookstackCredential) { - await linkCredential(connectorId, bookstackCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {bookstackCredential && - bookstackConnectorIndexingStatuses.length === 0 && ( - <> - -

Create Connection

- - Press connect below to start the connection to your BookStack - instance. - - - nameBuilder={(values) => `BookStackConnector`} - ccPairNameBuilder={(values) => `BookStackConnector`} - source="bookstack" - inputType="poll" - formBody={<>} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={bookstackCredential.id} - /> -
- - )} - - {!bookstackCredential && ( - <> - - Please provide your API details in Step 1 first! Once done with - that, you'll be able to start the connection then see indexing - status. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Bookstack" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/clickup/page.tsx b/web/src/app/admin/connectors/clickup/page.tsx deleted file mode 100644 index 6a868bac743..00000000000 --- a/web/src/app/admin/connectors/clickup/page.tsx +++ /dev/null @@ -1,343 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, ClickupIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - ClickupConfig, - ClickupCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - BooleanFormField, - SelectorFormField, - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Title, Text, Card, Divider } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { popup, setPopup } = usePopup(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: isConnectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: isCredentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { - return
Failed to load connectors
; - } - - if (isCredentialsError || !credentialsData) { - return
Failed to load credentials
; - } - - const clickupConnectorIndexingStatuses: ConnectorIndexingStatus< - ClickupConfig, - ClickupCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "clickup" - ); - - const clickupCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.clickup_api_token - ); - - return ( - <> - {popup} - - Step 1: Provide Credentials - - - {clickupCredential ? ( - <> -
- Existing Clickup API Token: - - {clickupCredential.credential_json.clickup_api_token} - - -
- - ) : ( - <> - - To use the Clickup connector, you must first provide the API token - and Team ID corresponding to your Clickup setup. See setup guide{" "} - - here - {" "} - for more detail. - - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - clickup_api_token: Yup.string().required( - "Please enter your Clickup API token" - ), - clickup_team_id: Yup.string().required( - "Please enter your Team ID" - ), - })} - initialValues={{ - clickup_api_token: "", - clickup_team_id: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Do you want to search particular space(s), folder(s), list(s), - or entire workspace? - - - {clickupConnectorIndexingStatuses.length > 0 && ( - <> - - We index the latest articles from either the entire workspace, or - specified space(s), folder(s), list(s) listed below regularly. - -
- - connectorIndexingStatuses={clickupConnectorIndexingStatuses} - liveCredential={clickupCredential} - getCredential={(credential) => - credential.credential_json.clickup_api_token - } - specialColumns={[ - { - header: "Connector Type", - key: "connector_type", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .connector_type, - }, - { - header: "ID(s)", - key: "connector_ids", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .connector_ids && - ccPairStatus.connector.connector_specific_config - .connector_ids.length > 0 - ? ccPairStatus.connector.connector_specific_config.connector_ids.join( - ", " - ) - : "", - }, - { - header: "Retrieve Task Comments?", - key: "retrieve_task_comments", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .retrieve_task_comments - ? "Yes" - : "No", - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (clickupCredential) { - await linkCredential(connectorId, clickupCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {clickupCredential ? ( - -

Connect to a New Workspace

- - nameBuilder={(values) => - values.connector_ids - ? `ClickupConnector-${ - values.connector_type - }-${values.connector_ids.join("_")}` - : `ClickupConnector-${values.connector_type}` - } - source="clickup" - inputType="poll" - formBody={ - <> - - - } - formBodyBuilder={(values) => { - return ( - <> - - {TextArrayFieldBuilder({ - name: "connector_ids", - label: "ID(s):", - subtext: "Specify 0 or more id(s) to index from.", - })(values)} - - - ); - }} - validationSchema={Yup.object().shape({ - connector_type: Yup.string() - .oneOf(["workspace", "space", "folder", "list"]) - .required("Please select the connector_type to index"), - connector_ids: Yup.array() - .of(Yup.string().required("ID(s) must be strings")) - .test( - "idsRequired", - "At least 1 ID is required if space, folder or list is selected", - function (value) { - if (this.parent.connector_type === "workspace") return true; - else if (value !== undefined && value.length > 0) - return true; - setPopup({ - type: "error", - message: `Add at least one ${this.parent.connector_type} ID`, - }); - return false; - } - ), - retrieve_task_comments: Yup.boolean().required(), - })} - initialValues={{ - connector_type: "workspace", - connector_ids: [], - retrieve_task_comments: true, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={clickupCredential.id} - /> -
- ) : ( - - Please provide your Clickup API token and Team ID in Step 1 first! - Once done with that, you can then specify whether you want to make the - entire workspace, or specified space(s), folder(s), list(s) - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Clickup" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/confluence/page.tsx b/web/src/app/admin/connectors/confluence/page.tsx deleted file mode 100644 index 981d63e9513..00000000000 --- a/web/src/app/admin/connectors/confluence/page.tsx +++ /dev/null @@ -1,342 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { ConfluenceIcon, TrashIcon } from "@/components/icons/icons"; -import { - BooleanFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - ConfluenceCredentialJson, - ConfluenceConfig, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const extractSpaceFromCloudUrl = (wikiUrl: string): string => { - const parsedUrl = new URL(wikiUrl); - const space = parsedUrl.pathname.split("/")[3]; - return space; -}; - -const extractSpaceFromDataCenterUrl = (wikiUrl: string): string => { - const DISPLAY = "/display/"; - - const parsedUrl = new URL(wikiUrl); - const spaceSpecificSection = parsedUrl.pathname - .split(DISPLAY) - .slice(1) - .join(DISPLAY); - const space = spaceSpecificSection.split("/")[0]; - return space; -}; - -// Copied from the `extract_confluence_keys_from_url` function -const extractSpaceFromUrl = (wikiUrl: string): string | null => { - try { - if ( - wikiUrl.includes(".atlassian.net/wiki/spaces/") || - wikiUrl.includes(".jira.com/wiki/spaces/") - ) { - return extractSpaceFromCloudUrl(wikiUrl); - } - return extractSpaceFromDataCenterUrl(wikiUrl); - } catch (e) { - console.log("Failed to extract space from url", e); - return null; - } -}; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const confluenceConnectorIndexingStatuses: ConnectorIndexingStatus< - ConfluenceConfig, - ConfluenceCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "confluence" - ); - const confluenceCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.confluence_access_token - ); - - return ( - <> - {popup} - - Step 1: Provide your access token - - - {confluenceCredential ? ( - <> -
- {/*
-

Existing Username:

-

- {confluenceCredential.credential_json?.confluence_username} -

{" "} -
*/} -

Existing Access Token:

-

- {confluenceCredential.credential_json?.confluence_access_token} -

- -
- - ) : ( - <> - - To use the Confluence connector, first follow the guide{" "} - - here - {" "} - to generate an Access Token. - - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - confluence_username: Yup.string().required( - "Please enter your username on Confluence" - ), - confluence_access_token: Yup.string().required( - "Please enter your Confluence access token" - ), - })} - initialValues={{ - confluence_username: "", - confluence_access_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - -

- Step 2: Which spaces do you want to make searchable? -

- {confluenceCredential ? ( - <> -

- Specify any link to a Confluence page below and click - "Index" to Index. Based on the provided link, we will - index either the entire page and its subpages OR the entire space. - For example, entering{" "} - - https://danswer.atlassian.net/wiki/spaces/Engineering/overview - {" "} - and clicking the Index button will index the whole{" "} - Engineering Confluence space, but entering - https://danswer.atlassian.net/wiki/spaces/Engineering/pages/164331/example+page - will index that page's children (and optionally, itself). Use - the checkbox below to determine whether or not to index the parent - page in addition to its children. -

- - {confluenceConnectorIndexingStatuses.length > 0 && ( - <> -

- We pull the latest pages and comments from each space listed - below every 10 minutes. -

-
- - connectorIndexingStatuses={ - confluenceConnectorIndexingStatuses - } - liveCredential={confluenceCredential} - getCredential={(credential) => { - return ( -
-

- {credential.credential_json.confluence_access_token} -

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (confluenceCredential) { - await linkCredential( - connectorId, - confluenceCredential.id - ); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Url", - key: "url", - getValue: (ccPairStatus) => ( - - { - ccPairStatus.connector.connector_specific_config - .wiki_page_url - } - - ), - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - - )} - -

Add a New Space or Page

- - nameBuilder={(values) => - `ConfluenceConnector-${values.wiki_page_url}` - } - ccPairNameBuilder={(values) => - extractSpaceFromUrl(values.wiki_page_url) - } - source="confluence" - inputType="poll" - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - wiki_page_url: Yup.string().required( - "Please enter any link to a Confluence space or Page e.g. https://danswer.atlassian.net/wiki/spaces/Engineering/overview" - ), - index_origin: Yup.boolean(), - })} - initialValues={{ - wiki_page_url: "", - index_origin: true, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={confluenceCredential.id} - /> -
- - ) : ( - - Please provide your access token in Step 1 first! Once done with that, - you can then specify which Confluence spaces you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Confluence" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/discourse/page.tsx b/web/src/app/admin/connectors/discourse/page.tsx deleted file mode 100644 index 9ba840d3d92..00000000000 --- a/web/src/app/admin/connectors/discourse/page.tsx +++ /dev/null @@ -1,285 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { DiscourseIcon, TrashIcon } from "@/components/icons/icons"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - DiscourseConfig, - DiscourseCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const discourseConnectorIndexingStatuses: ConnectorIndexingStatus< - DiscourseConfig, - DiscourseCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "discourse" - ); - const discourseCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.discourse_api_username - ); - - return ( - <> - {popup} - - This connector allows you to sync all your Discourse Topics into - Danswer. More details on how to setup the Discourse connector can be - found in{" "} - - this guide. - - - - - Step 1: Provide your API Access info - - - {discourseCredential ? ( - <> -
-

Existing API Key:

-

- {discourseCredential.credential_json?.discourse_api_key} -

- -
- - ) : ( - <> - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - discourse_api_username: Yup.string().required( - "Please enter the Username associated with the API key" - ), - discourse_api_key: Yup.string().required( - "Please enter the API key" - ), - })} - initialValues={{ - discourse_api_username: "", - discourse_api_key: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which Categories do you want to make searchable? - - - {discourseConnectorIndexingStatuses.length > 0 && ( - <> - - We pull Topics with new Posts every 10 minutes. - -
- - connectorIndexingStatuses={discourseConnectorIndexingStatuses} - liveCredential={discourseCredential} - getCredential={(credential) => - credential.credential_json.discourse_api_username - } - specialColumns={[ - { - header: "Categories", - key: "categories", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .categories && - ccPairStatus.connector.connector_specific_config.categories - .length > 0 - ? ccPairStatus.connector.connector_specific_config.categories.join( - ", " - ) - : "", - }, - ]} - includeName={true} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (discourseCredential) { - await linkCredential(connectorId, discourseCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {discourseCredential ? ( - <> - -

Create a new Discourse Connector

- - nameBuilder={(values) => - values.categories - ? `${values.base_url}-${values.categories.join("_")}` - : `${values.base_url}-All` - } - source="discourse" - inputType="poll" - formBody={ - <> - - - } - formBodyBuilder={TextArrayFieldBuilder({ - name: "categories", - label: "Categories:", - subtext: - "Specify 0 or more Categories to index. If no Categories are specified, Topics from " + - "all categories will be indexed.", - })} - validationSchema={Yup.object().shape({ - base_url: Yup.string().required( - "Please the base URL of your Discourse site." - ), - categories: Yup.array().of( - Yup.string().required("Category names must be strings") - ), - })} - initialValues={{ - categories: [], - base_url: "", - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={discourseCredential.id} - /> -
- - ) : ( - - Please provide your API Key Info in Step 1 first! Once done with that, - you can then start indexing all your Discourse Topics. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Discourse" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/document360/page.tsx b/web/src/app/admin/connectors/document360/page.tsx deleted file mode 100644 index 85653c639e3..00000000000 --- a/web/src/app/admin/connectors/document360/page.tsx +++ /dev/null @@ -1,277 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, Document360Icon } from "@/components/icons/icons"; // Make sure you have a Document360 icon -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - Document360Config, - Document360CredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; // Modify or create these types as required -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Title, Text, Card, Divider } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const document360ConnectorIndexingStatuses: ConnectorIndexingStatus< - Document360Config, - Document360CredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "document360" - ); - - const document360Credential: - | Credential - | undefined = credentialsData.find( - (credential) => credential.credential_json?.document360_api_token - ); - - return ( - <> - - Step 1: Provide Credentials - - {document360Credential ? ( - <> -
- Existing Document360 API Token: - - {document360Credential.credential_json.document360_api_token} - - -
- - ) : ( - <> - - To use the Document360 connector, you must first provide the API - token and portal ID corresponding to your Document360 setup. See - setup guide{" "} - - here - {" "} - for more detail. - - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - document360_api_token: Yup.string().required( - "Please enter your Document360 API token" - ), - portal_id: Yup.string().required("Please enter your portal ID"), - })} - initialValues={{ - document360_api_token: "", - portal_id: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which categories do you want to make searchable? - - - {document360ConnectorIndexingStatuses.length > 0 && ( - <> - - We index the latest articles from each workspace listed below - regularly. - -
- - connectorIndexingStatuses={document360ConnectorIndexingStatuses} - liveCredential={document360Credential} - getCredential={(credential) => - credential.credential_json.document360_api_token - } - specialColumns={[ - { - header: "Workspace", - key: "workspace", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config.workspace, - }, - { - header: "Categories", - key: "categories", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .categories && - ccPairStatus.connector.connector_specific_config.categories - .length > 0 - ? ccPairStatus.connector.connector_specific_config.categories.join( - ", " - ) - : "", - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (document360Credential) { - await linkCredential(connectorId, document360Credential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {document360Credential ? ( - -

Connect to a New Workspace

- - nameBuilder={(values) => - values.categories - ? `Document360Connector-${ - values.workspace - }-${values.categories.join("_")}` - : `Document360Connector-${values.workspace}` - } - source="document360" - inputType="poll" - formBody={ - <> - - - } - formBodyBuilder={TextArrayFieldBuilder({ - name: "categories", - label: "Categories:", - subtext: - "Specify 0 or more categories to index. For instance, specifying the category " + - "'Help' will cause us to only index all content " + - "within the 'Help' category. " + - "If no categories are specified, all categories in your workspace will be indexed.", - })} - validationSchema={Yup.object().shape({ - workspace: Yup.string().required( - "Please enter the workspace to index" - ), - categories: Yup.array() - .of(Yup.string().required("Category names must be strings")) - .required(), - })} - initialValues={{ - workspace: "", - categories: [], - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={document360Credential.id} - /> -
- ) : ( - - Please provide your Document360 API token and portal ID in Step 1 - first! Once done with that, you can then specify which Document360 - categories you want to make searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Document360" - /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/dropbox/page.tsx b/web/src/app/admin/connectors/dropbox/page.tsx deleted file mode 100644 index a897a34072f..00000000000 --- a/web/src/app/admin/connectors/dropbox/page.tsx +++ /dev/null @@ -1,222 +0,0 @@ -"use client"; - -import { AdminPageTitle } from "@/components/admin/Title"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { DropboxIcon } from "@/components/icons/icons"; -import { LoadingAnimation } from "@/components/Loading"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { TrashIcon } from "@/components/icons/icons"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { usePublicCredentials } from "@/lib/hooks"; -import { - ConnectorIndexingStatus, - Credential, - DropboxConfig, - DropboxCredentialJson, -} from "@/lib/types"; -import { Card, Text, Title } from "@tremor/react"; -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const dropboxConnectorIndexingStatuses: ConnectorIndexingStatus< - DropboxConfig, - DropboxCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "dropbox" - ); - const dropboxCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.dropbox_access_token - ); - - return ( - <> - {popup} - - Provide your API details - - - {dropboxCredential ? ( - <> -
-

Existing API Token:

-

- {dropboxCredential.credential_json?.dropbox_access_token} -

- -
- - ) : ( - <> - - See the Dropbox connector{" "} - - setup guide - {" "} - on the Danswer docs to obtain a Dropbox token. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - dropbox_access_token: Yup.string().required( - "Please enter your Dropbox API token" - ), - })} - initialValues={{ - dropbox_access_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> - - - )} - - {dropboxConnectorIndexingStatuses.length > 0 && ( - <> - - Dropbox indexing status - - - Due to Dropbox access key design, the Dropbox connector will only - re-index files after a new access key is provided and the indexing - process is re-run manually. Check the docs for more information. - -
- - connectorIndexingStatuses={dropboxConnectorIndexingStatuses} - liveCredential={dropboxCredential} - onCredentialLink={async (connectorId) => { - if (dropboxCredential) { - await linkCredential(connectorId, dropboxCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {dropboxCredential && dropboxConnectorIndexingStatuses.length === 0 && ( - <> - -

Create Connection

-

- Press connect below to start the connection to your Dropbox - instance. -

- - nameBuilder={(values) => `Dropbox`} - ccPairNameBuilder={(values) => `Dropbox`} - source="dropbox" - inputType="poll" - formBody={<>} - validationSchema={Yup.object().shape({})} - initialValues={{}} - // refreshFreq={10 * 60} // disabled re-indexing - credentialId={dropboxCredential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- } title="Dropbox" /> -
-
- ); -} diff --git a/web/src/app/admin/connectors/file/page.tsx b/web/src/app/admin/connectors/file/page.tsx deleted file mode 100644 index b2857f66c61..00000000000 --- a/web/src/app/admin/connectors/file/page.tsx +++ /dev/null @@ -1,299 +0,0 @@ -"use client"; - -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; - -import { FileIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { ConnectorIndexingStatus, FileConfig } from "@/lib/types"; -import { createCredential, linkCredential } from "@/lib/credential"; -import { useState } from "react"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { createConnector, runConnector } from "@/lib/connector"; -import { Spinner } from "@/components/Spinner"; -import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable"; -import { LoadingAnimation } from "@/components/Loading"; -import { Form, Formik } from "formik"; -import { - BooleanFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { FileUpload } from "@/components/admin/connectors/FileUpload"; -import { getNameFromPath } from "@/lib/fileUtils"; -import { Button, Card, Divider, Text } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; -import IsPublicField from "@/components/admin/connectors/IsPublicField"; -import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; - -const Main = () => { - const [selectedFiles, setSelectedFiles] = useState([]); - const [filesAreUploading, setFilesAreUploading] = useState(false); - const { popup, setPopup } = usePopup(); - - const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); - - const { mutate } = useSWRConfig(); - - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - if (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) { - return ; - } - - const fileIndexingStatuses: ConnectorIndexingStatus[] = - connectorIndexingStatuses?.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "file" - ) ?? []; - - return ( -
- {popup} - {filesAreUploading && } - - Specify files below, click the Upload button, and the contents of - these files will be searchable via Danswer! Currently supported file - types include .txt, .pdf, .docx, .pptx,{" "} - .xlsx, .csv, .md, .mdx, .conf,{" "} - .log, .json, .tsv, .xml, .yml,{" "} - .yaml, .eml, .epub, and finally .zip files - (containing supported file types). - - - NOTE: if the original document is accessible via a link, you can - add a line at the very beginning of the file that looks like: -
-
- #DANSWER_METADATA={"{"}"link": "{""}"{"}"} -
-
{" "} - where {""} is the link to the file. This will enable - Danswer to link users to the original document when they click on the - search result. More details on this can be found in the{" "} - - documentation. - -
-
-
- - { - const uploadCreateAndTriggerConnector = async () => { - const formData = new FormData(); - - selectedFiles.forEach((file) => { - formData.append("files", file); - }); - - const response = await fetch( - "/api/manage/admin/connector/file/upload", - { method: "POST", body: formData } - ); - const responseJson = await response.json(); - if (!response.ok) { - setPopup({ - message: `Unable to upload files - ${responseJson.detail}`, - type: "error", - }); - return; - } - - const filePaths = responseJson.file_paths as string[]; - const [connectorErrorMsg, connector] = - await createConnector({ - name: "FileConnector-" + Date.now(), - source: "file", - input_type: "load_state", - connector_specific_config: { - file_locations: filePaths, - }, - refresh_freq: null, - prune_freq: 0, - disabled: false, - }); - if (connectorErrorMsg || !connector) { - setPopup({ - message: `Unable to create connector - ${connectorErrorMsg}`, - type: "error", - }); - return; - } - - // Since there is no "real" credential associated with a file connector - // we create a dummy one here so that we can associate the CC Pair with a - // user. This is needed since the user for a CC Pair is found via the credential - // associated with it. - const createCredentialResponse = await createCredential({ - credential_json: {}, - admin_public: true, - }); - if (!createCredentialResponse.ok) { - const errorMsg = await createCredentialResponse.text(); - setPopup({ - message: `Error creating credential for CC Pair - ${errorMsg}`, - type: "error", - }); - formikHelpers.setSubmitting(false); - return; - } - const credentialId = (await createCredentialResponse.json()) - .id; - - const credentialResponse = await linkCredential( - connector.id, - credentialId, - values.name, - values.is_public - ); - if (!credentialResponse.ok) { - const credentialResponseJson = - await credentialResponse.json(); - setPopup({ - message: `Unable to link connector to credential - ${credentialResponseJson.detail}`, - type: "error", - }); - return; - } - - const runConnectorErrorMsg = await runConnector( - connector.id, - [0] - ); - if (runConnectorErrorMsg) { - setPopup({ - message: `Unable to run connector - ${runConnectorErrorMsg}`, - type: "error", - }); - return; - } - - mutate("/api/manage/admin/connector/indexing-status"); - setSelectedFiles([]); - formikHelpers.resetForm(); - setPopup({ - type: "success", - message: "Successfully uploaded files!", - }); - }; - - setFilesAreUploading(true); - try { - await uploadCreateAndTriggerConnector(); - } catch (e) { - console.log("Failed to index filels: ", e); - } - setFilesAreUploading(false); - }} - > - {({ values, isSubmitting }) => ( -
-

- Upload Files -

- - -

Files:

- - - {isPaidEnterpriseFeaturesEnabled && ( - <> - - - - - )} - -
- -
- - )} -
-
-
-
- - {fileIndexingStatuses.length > 0 && ( -
- -

Indexed Files

- - connectorIndexingStatuses={fileIndexingStatuses} - specialColumns={[ - { - header: "File Names", - key: "file_names", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config.file_locations - .map(getNameFromPath) - .join(", "), - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- )} -
- ); -}; - -export default function File() { - return ( -
-
- -
- - } title="File" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/github/page.tsx b/web/src/app/admin/connectors/github/page.tsx deleted file mode 100644 index 6d7e915d335..00000000000 --- a/web/src/app/admin/connectors/github/page.tsx +++ /dev/null @@ -1,280 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GithubIcon, TrashIcon } from "@/components/icons/icons"; -import { - BooleanFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { - GithubConfig, - GithubCredentialJson, - Credential, - ConnectorIndexingStatus, -} from "@/lib/types"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { LoadingAnimation } from "@/components/Loading"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const githubConnectorIndexingStatuses: ConnectorIndexingStatus< - GithubConfig, - GithubCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "github" - ); - const githubCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.github_access_token - ); - - return ( - <> - - Step 1: Provide your access token - - {githubCredential ? ( - <> - {" "} -
-

Existing Access Token:

-

- {githubCredential.credential_json.github_access_token} -

{" "} - -
- - ) : ( - <> - - If you don't have an access token, read the guide{" "} - - here - {" "} - on how to get one from Github. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - github_access_token: Yup.string().required( - "Please enter the access token for Github" - ), - })} - initialValues={{ - github_access_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which repositories do you want to make searchable? - - - {githubConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest Pull Requests and/or Issues from each repository - listed below every 10 minutes. - -
- - connectorIndexingStatuses={githubConnectorIndexingStatuses} - liveCredential={githubCredential} - getCredential={(credential) => - credential.credential_json.github_access_token - } - onCredentialLink={async (connectorId) => { - if (githubCredential) { - await linkCredential(connectorId, githubCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Repository", - key: "repository", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.repo_owner}/${connectorConfig.repo_name}`; - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - - )} - - {githubCredential ? ( - -

Connect to a New Repository

- - - The Github connector can index Pull Requests and Issues. - - - - nameBuilder={(values) => - `GithubConnector-${values.repo_owner}/${values.repo_name}` - } - ccPairNameBuilder={(values) => - `${values.repo_owner}/${values.repo_name}` - } - source="github" - inputType="poll" - formBody={ - <> - - - - - - } - validationSchema={Yup.object().shape({ - repo_owner: Yup.string().required( - "Please enter the owner of the repository to index e.g. danswer-ai" - ), - repo_name: Yup.string().required( - "Please enter the name of the repository to index e.g. danswer " - ), - include_prs: Yup.boolean().required(), - include_issues: Yup.boolean().required(), - })} - validate={(values) => { - if (values.include_prs || values.include_issues) { - return {} as Record; - } - return { - include_issues: - "Please select at least one of Pull Requests or Issues", - }; - }} - initialValues={{ - repo_owner: "", - repo_name: "", - include_prs: true, - include_issues: true, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={githubCredential.id} - /> -
- ) : ( - - Please provide your access token in Step 1 first! Once done with that, - you can then specify which Github repositories you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Github PRs + Issues" - /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/gitlab/page.tsx b/web/src/app/admin/connectors/gitlab/page.tsx deleted file mode 100644 index 595cd575f76..00000000000 --- a/web/src/app/admin/connectors/gitlab/page.tsx +++ /dev/null @@ -1,265 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GitlabIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { - GitlabConfig, - GitlabCredentialJson, - Credential, - ConnectorIndexingStatus, -} from "@/lib/types"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { LoadingAnimation } from "@/components/Loading"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const gitlabConnectorIndexingStatuses: ConnectorIndexingStatus< - GitlabConfig, - GitlabCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "gitlab" - ); - const gitlabCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.gitlab_access_token - ); - - return ( - <> - - Step 1: Provide your access token - - {gitlabCredential ? ( - <> - {" "} -
-

Existing Access Token:

-

- {gitlabCredential.credential_json.gitlab_access_token} -

{" "} - -
- - ) : ( - <> - - If you don't have an access token, read the guide{" "} - - here - {" "} - on how to get one from Gitlab. - - - - formBody={ - <> - - If you are using GitLab Cloud, keep the default value below - - - - - - } - validationSchema={Yup.object().shape({ - gitlab_url: Yup.string().default("https://gitlab.com"), - gitlab_access_token: Yup.string().required( - "Please enter the access token for Gitlab" - ), - })} - initialValues={{ - gitlab_access_token: "", - gitlab_url: "https://gitlab.com", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which repositories do you want to make searchable? - - - {gitlabConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest Pull Requests from each project listed below - every 10 minutes. - -
- - connectorIndexingStatuses={gitlabConnectorIndexingStatuses} - liveCredential={gitlabCredential} - getCredential={(credential) => - credential.credential_json.gitlab_access_token - } - onCredentialLink={async (connectorId) => { - if (gitlabCredential) { - await linkCredential(connectorId, gitlabCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Project", - key: "project", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.project_owner}/${connectorConfig.project_name}`; - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - - )} - - {gitlabCredential ? ( - -

Connect to a New Project

- - nameBuilder={(values) => - `GitlabConnector-${values.project_owner}/${values.project_name}` - } - ccPairNameBuilder={(values) => - `${values.project_owner}/${values.project_name}` - } - source="gitlab" - inputType="poll" - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - project_owner: Yup.string().required( - "Please enter the owner of the project to index e.g. danswer-ai" - ), - project_name: Yup.string().required( - "Please enter the name of the project to index e.g. danswer " - ), - include_mrs: Yup.boolean().required(), - include_issues: Yup.boolean().required(), - })} - initialValues={{ - project_owner: "", - project_name: "", - include_mrs: true, - include_issues: true, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={gitlabCredential.id} - /> -
- ) : ( - - Please provide your access token in Step 1 first! Once done with that, - you can then specify which Gitlab repositories you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Gitlab MRs + Issues" - /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx b/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx deleted file mode 100644 index f9571204d52..00000000000 --- a/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx +++ /dev/null @@ -1,127 +0,0 @@ -import { BasicTable } from "@/components/admin/connectors/BasicTable"; -import { PopupSpec } from "@/components/admin/connectors/Popup"; -import { StatusRow } from "@/components/admin/connectors/table/ConnectorsTable"; -import { deleteConnector } from "@/lib/connector"; -import { - GmailConfig, - ConnectorIndexingStatus, - GmailCredentialJson, -} from "@/lib/types"; -import { useSWRConfig } from "swr"; -import { DeleteColumn } from "@/components/admin/connectors/table/DeleteColumn"; -import { - Table, - TableHead, - TableRow, - TableHeaderCell, - TableBody, - TableCell, -} from "@tremor/react"; - -interface TableProps { - gmailConnectorIndexingStatuses: ConnectorIndexingStatus< - GmailConfig, - GmailCredentialJson - >[]; - setPopup: (popupSpec: PopupSpec | null) => void; -} - -export const GmailConnectorsTable = ({ - gmailConnectorIndexingStatuses: gmailConnectorIndexingStatuses, - setPopup, -}: TableProps) => { - const { mutate } = useSWRConfig(); - - // Sorting to maintain a consistent ordering - const sortedGmailConnectorIndexingStatuses = [ - ...gmailConnectorIndexingStatuses, - ]; - sortedGmailConnectorIndexingStatuses.sort( - (a, b) => a.connector.id - b.connector.id - ); - - return ( -
-
- - - Status - Delete - - - - {sortedGmailConnectorIndexingStatuses.map( - (connectorIndexingStatus) => { - return ( - - - { - mutate("/api/manage/admin/connector/indexing-status"); - }} - /> - - - - mutate("/api/manage/admin/connector/indexing-status") - } - /> - - - ); - } - )} - -
-
- ); - - return ( - ({ - status: ( - { - mutate("/api/manage/admin/connector/indexing-status"); - }} - /> - ), - delete: ( - - mutate("/api/manage/admin/connector/indexing-status") - } - /> - ), - }) - )} - /> - ); -}; diff --git a/web/src/app/admin/connectors/gmail/auth/callback/route.ts b/web/src/app/admin/connectors/gmail/auth/callback/route.ts deleted file mode 100644 index 71e28f59708..00000000000 --- a/web/src/app/admin/connectors/gmail/auth/callback/route.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { getDomain } from "@/lib/redirectSS"; -import { buildUrl } from "@/lib/utilsSS"; -import { NextRequest, NextResponse } from "next/server"; -import { cookies } from "next/headers"; -import { GMAIL_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; -import { processCookies } from "@/lib/userSS"; - -export const GET = async (request: NextRequest) => { - // Wrapper around the FastAPI endpoint /connectors/gmail/callback, - // which adds back a redirect to the Gmail admin page. - const url = new URL(buildUrl("/manage/connector/gmail/callback")); - url.search = request.nextUrl.search; - - const response = await fetch(url.toString(), { - headers: { - cookie: processCookies(cookies()), - }, - }); - - if (!response.ok) { - console.log("Error in Gmail callback:", (await response.json()).detail); - return NextResponse.redirect(new URL("/auth/error", getDomain(request))); - } - - if ( - cookies().get(GMAIL_AUTH_IS_ADMIN_COOKIE_NAME)?.value?.toLowerCase() === - "true" - ) { - return NextResponse.redirect( - new URL("/admin/connectors/gmail", getDomain(request)) - ); - } - return NextResponse.redirect(new URL("/user/connectors", getDomain(request))); -}; diff --git a/web/src/app/admin/connectors/gmail/page.tsx b/web/src/app/admin/connectors/gmail/page.tsx deleted file mode 100644 index f0800d293e4..00000000000 --- a/web/src/app/admin/connectors/gmail/page.tsx +++ /dev/null @@ -1,276 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GmailIcon } from "@/components/icons/icons"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - ConnectorIndexingStatus, - Credential, - GmailCredentialJson, - GmailServiceAccountCredentialJson, - GmailConfig, -} from "@/lib/types"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { GmailConnectorsTable } from "./GmailConnectorsTable"; -import { gmailConnectorNameBuilder } from "./utils"; -import { GmailOAuthSection, GmailJsonUploadSection } from "./Credential"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Divider, Text, Title } from "@tremor/react"; - -interface GmailConnectorManagementProps { - gmailPublicCredential?: Credential; - gmailServiceAccountCredential?: Credential; - gmailConnectorIndexingStatus: ConnectorIndexingStatus< - GmailConfig, - GmailCredentialJson - > | null; - gmailConnectorIndexingStatuses: ConnectorIndexingStatus< - GmailConfig, - GmailCredentialJson - >[]; - credentialIsLinked: boolean; - setPopup: (popupSpec: PopupSpec | null) => void; -} - -const GmailConnectorManagement = ({ - gmailPublicCredential: gmailPublicCredential, - gmailServiceAccountCredential: gmailServiceAccountCredential, - gmailConnectorIndexingStatuses: gmailConnectorIndexingStatuses, - setPopup, -}: GmailConnectorManagementProps) => { - const { mutate } = useSWRConfig(); - - const liveCredential = gmailPublicCredential || gmailServiceAccountCredential; - if (!liveCredential) { - return ( - - Please authenticate with Gmail as described in Step 2! Once done with - that, you can then move on to enable this connector. - - ); - } - - return ( -
- -
- {gmailConnectorIndexingStatuses.length > 0 ? ( - <> - Checkout the{" "} - - status page - {" "} - for the latest indexing status. We fetch the latest mails from - Gmail every 10 minutes. - - ) : ( -

- Fill out the form below to create a connector. We will refresh the - latest documents from Gmail every 10 minutes. -

- )} -
-
- {gmailConnectorIndexingStatuses.length > 0 && ( - <> -
Existing Connectors:
- - - - )} - - {gmailConnectorIndexingStatuses.length > 0 && ( -

Add New Connector:

- )} - - - nameBuilder={gmailConnectorNameBuilder} - source="gmail" - inputType="poll" - formBody={null} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={liveCredential.id} - /> - -
- ); -}; - -const Main = () => { - const { - data: appCredentialData, - isLoading: isAppCredentialLoading, - error: isAppCredentialError, - } = useSWR<{ client_id: string }>( - "/api/manage/admin/connector/gmail/app-credential", - errorHandlingFetcher - ); - const { - data: serviceAccountKeyData, - isLoading: isServiceAccountKeyLoading, - error: isServiceAccountKeyError, - } = useSWR<{ service_account_email: string }>( - "/api/manage/admin/connector/gmail/service-account-key", - errorHandlingFetcher - ); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - const { popup, setPopup } = usePopup(); - - const appCredentialSuccessfullyFetched = - appCredentialData || - (isAppCredentialError && isAppCredentialError.status === 404); - const serviceAccountKeySuccessfullyFetched = - serviceAccountKeyData || - (isServiceAccountKeyError && isServiceAccountKeyError.status === 404); - - if ( - (!appCredentialSuccessfullyFetched && isAppCredentialLoading) || - (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) || - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ( -
- -
- ); - } - - if (credentialsError || !credentialsData) { - return ( -
-
Failed to load credentials.
-
- ); - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( -
-
Failed to load connectors.
-
- ); - } - - if ( - !appCredentialSuccessfullyFetched || - !serviceAccountKeySuccessfullyFetched - ) { - return ( -
-
- Error loading Gmail app credentials. Contact an administrator. -
-
- ); - } - - const gmailPublicCredential: Credential | undefined = - credentialsData.find( - (credential) => - credential.credential_json?.gmail_tokens && credential.admin_public - ); - const gmailServiceAccountCredential: - | Credential - | undefined = credentialsData.find( - (credential) => credential.credential_json?.gmail_service_account_key - ); - const gmailConnectorIndexingStatuses: ConnectorIndexingStatus< - GmailConfig, - GmailCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "gmail" - ); - const gmailConnectorIndexingStatus = gmailConnectorIndexingStatuses[0]; - - const credentialIsLinked = - (gmailConnectorIndexingStatus !== undefined && - gmailPublicCredential !== undefined && - gmailConnectorIndexingStatus.connector.credential_ids.includes( - gmailPublicCredential.id - )) || - (gmailConnectorIndexingStatus !== undefined && - gmailServiceAccountCredential !== undefined && - gmailConnectorIndexingStatus.connector.credential_ids.includes( - gmailServiceAccountCredential.id - )); - - return ( - <> - {popup} - - Step 1: Provide your Credentials - - - - - Step 2: Authenticate with Danswer - - 0} - /> - - - Step 3: Start Indexing! - - - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Gmail" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/gmail/utils.ts b/web/src/app/admin/connectors/gmail/utils.ts deleted file mode 100644 index e7f8a24b3f1..00000000000 --- a/web/src/app/admin/connectors/gmail/utils.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { GmailConfig } from "@/lib/types"; - -export const gmailConnectorNameBuilder = (values: GmailConfig) => - "GmailConnector"; diff --git a/web/src/app/admin/connectors/gong/page.tsx b/web/src/app/admin/connectors/gong/page.tsx deleted file mode 100644 index 5fda45d517e..00000000000 --- a/web/src/app/admin/connectors/gong/page.tsx +++ /dev/null @@ -1,269 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GongIcon, TrashIcon } from "@/components/icons/icons"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - GongConfig, - GongCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const gongConnectorIndexingStatuses: ConnectorIndexingStatus< - GongConfig, - GongCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "gong" - ); - const gongCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.gong_access_key - ); - - return ( - <> - {popup} - - This connector allows you to sync all your Gong Transcripts into - Danswer. More details on how to setup the Gong connector can be found in{" "} - - this guide. - - - - - Step 1: Provide your API Access info - - - {gongCredential ? ( - <> -
-

Existing Access Key Secret:

-

- {gongCredential.credential_json?.gong_access_key_secret} -

- -
- - ) : ( - <> - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - gong_access_key: Yup.string().required( - "Please enter your Gong Access Key" - ), - gong_access_key_secret: Yup.string().required( - "Please enter your Gong Access Key Secret" - ), - })} - initialValues={{ - gong_access_key: "", - gong_access_key_secret: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which Workspaces do you want to make searchable? - - - {gongConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest transcript every 10 minutes. - -
- - connectorIndexingStatuses={gongConnectorIndexingStatuses} - liveCredential={gongCredential} - getCredential={(credential) => - credential.credential_json.gong_access_key - } - specialColumns={[ - { - header: "Workspaces", - key: "workspaces", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .workspaces && - ccPairStatus.connector.connector_specific_config.workspaces - .length > 0 - ? ccPairStatus.connector.connector_specific_config.workspaces.join( - ", " - ) - : "", - }, - ]} - includeName={true} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (gongCredential) { - await linkCredential(connectorId, gongCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {gongCredential ? ( - <> - -

Create a new Gong Connector

- - nameBuilder={(values) => - values.workspaces - ? `GongConnector-${values.workspaces.join("_")}` - : `GongConnector-All` - } - source="gong" - inputType="poll" - formBodyBuilder={TextArrayFieldBuilder({ - name: "workspaces", - label: "Workspaces:", - subtext: - "Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace " + - "name from Gong. If no workspaces are specified, transcripts from all workspaces will " + - "be indexed.", - })} - validationSchema={Yup.object().shape({ - workspaces: Yup.array().of( - Yup.string().required("Workspace names must be strings") - ), - })} - initialValues={{ - workspaces: [], - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={gongCredential.id} - /> -
- - ) : ( - - Please provide your API Access Info in Step 1 first! Once done with - that, you can then start indexing all your Gong transcripts. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Gong" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/google-drive/ConnectorEditPopup.tsx b/web/src/app/admin/connectors/google-drive/ConnectorEditPopup.tsx deleted file mode 100644 index 60e3bc05e02..00000000000 --- a/web/src/app/admin/connectors/google-drive/ConnectorEditPopup.tsx +++ /dev/null @@ -1,82 +0,0 @@ -import { UpdateConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { - BooleanFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { XIcon } from "@/components/icons/icons"; -import { Connector, GoogleDriveConfig } from "@/lib/types"; -import * as Yup from "yup"; -import { googleDriveConnectorNameBuilder } from "./utils"; -import { Modal } from "@/components/Modal"; -import { Divider, Text } from "@tremor/react"; - -interface Props { - existingConnector: Connector; - onSubmit: () => void; -} - -export const ConnectorEditPopup = ({ existingConnector, onSubmit }: Props) => { - return ( - -
-

- Update Google Drive Connector -
- -
-

- - - Modify the selected Google Drive connector by adjusting the values - below! - - - - - - nameBuilder={googleDriveConnectorNameBuilder} - existingConnector={existingConnector} - formBodyBuilder={(values) => ( -
- {TextArrayFieldBuilder({ - name: "folder_paths", - label: "Folder Paths", - })(values)} - - - -
- )} - validationSchema={Yup.object().shape({ - folder_paths: Yup.array() - .of( - Yup.string().required( - "Please specify a folder path for your google drive e.g. 'Engineering/Materials'" - ) - ) - .required(), - include_shared: Yup.boolean().required(), - follow_shortcuts: Yup.boolean().required(), - only_org_public: Yup.boolean().required(), - })} - onSubmit={onSubmit} - /> -
-
- ); -}; diff --git a/web/src/app/admin/connectors/google-drive/GoogleDriveConnectorsTable.tsx b/web/src/app/admin/connectors/google-drive/GoogleDriveConnectorsTable.tsx deleted file mode 100644 index 4da18f682d2..00000000000 --- a/web/src/app/admin/connectors/google-drive/GoogleDriveConnectorsTable.tsx +++ /dev/null @@ -1,300 +0,0 @@ -import { Button } from "@/components/Button"; -import { BasicTable } from "@/components/admin/connectors/BasicTable"; -import { PopupSpec } from "@/components/admin/connectors/Popup"; -import { StatusRow } from "@/components/admin/connectors/table/ConnectorsTable"; -import { EditIcon } from "@/components/icons/icons"; -import { deleteConnector } from "@/lib/connector"; -import { - GoogleDriveConfig, - ConnectorIndexingStatus, - GoogleDriveCredentialJson, -} from "@/lib/types"; -import { useSWRConfig } from "swr"; -import { useState } from "react"; -import { ConnectorEditPopup } from "./ConnectorEditPopup"; -import { DeleteColumn } from "@/components/admin/connectors/table/DeleteColumn"; -import { - Table, - TableHead, - TableRow, - TableHeaderCell, - TableBody, - TableCell, -} from "@tremor/react"; - -interface EditableColumnProps { - connectorIndexingStatus: ConnectorIndexingStatus< - GoogleDriveConfig, - GoogleDriveCredentialJson - >; -} - -const EditableColumn = ({ connectorIndexingStatus }: EditableColumnProps) => { - const { mutate } = useSWRConfig(); - const [isEditing, setIsEditing] = useState(false); - - return ( - <> - {isEditing && ( - { - setIsEditing(false); - mutate("/api/manage/admin/connector/indexing-status"); - }} - /> - )} -
-
{ - setIsEditing(true); - }} - className="cursor-pointer" - > -
- -
-
-
- - ); -}; - -interface TableProps { - googleDriveConnectorIndexingStatuses: ConnectorIndexingStatus< - GoogleDriveConfig, - GoogleDriveCredentialJson - >[]; - setPopup: (popupSpec: PopupSpec | null) => void; -} - -export const GoogleDriveConnectorsTable = ({ - googleDriveConnectorIndexingStatuses, - setPopup, -}: TableProps) => { - const { mutate } = useSWRConfig(); - - // Sorting to maintain a consistent ordering - const sortedGoogleDriveConnectorIndexingStatuses = [ - ...googleDriveConnectorIndexingStatuses, - ]; - sortedGoogleDriveConnectorIndexingStatuses.sort( - (a, b) => a.connector.id - b.connector.id - ); - - return ( -
- - - - Edit - Folder Paths - Include Shared - Follow Shortcuts - Only Org Public - Status - Delete - - - - {sortedGoogleDriveConnectorIndexingStatuses.map( - (connectorIndexingStatus) => { - return ( - - - - - - {( - connectorIndexingStatus.connector - .connector_specific_config.folder_paths || [] - ).length > 0 ? ( -
- {( - connectorIndexingStatus.connector - .connector_specific_config.folder_paths || [] - ).map((path) => ( -
- - {path} -
- ))} -
- ) : ( - All Folders - )} -
- -
- {connectorIndexingStatus.connector - .connector_specific_config.include_shared ? ( - Yes - ) : ( - No - )} -
-
- -
- {connectorIndexingStatus.connector - .connector_specific_config.follow_shortcuts ? ( - Yes - ) : ( - No - )} -
-
- -
- {connectorIndexingStatus.connector - .connector_specific_config.only_org_public ? ( - Yes - ) : ( - No - )} -
-
- - { - mutate("/api/manage/admin/connector/indexing-status"); - }} - /> - - - - mutate("/api/manage/admin/connector/indexing-status") - } - /> - -
- ); - } - )} -
-
-
- ); - - return ( - ({ - edit: ( - - ), - folder_paths: - ( - connectorIndexingStatus.connector.connector_specific_config - .folder_paths || [] - ).length > 0 ? ( -
- {( - connectorIndexingStatus.connector.connector_specific_config - .folder_paths || [] - ).map((path) => ( -
- - {path} -
- ))} -
- ) : ( - All Folders - ), - include_shared: ( -
- {connectorIndexingStatus.connector.connector_specific_config - .include_shared ? ( - Yes - ) : ( - No - )} -
- ), - follow_shortcuts: ( -
- {connectorIndexingStatus.connector.connector_specific_config - .follow_shortcuts ? ( - Yes - ) : ( - No - )} -
- ), - only_org_public: ( -
- {connectorIndexingStatus.connector.connector_specific_config - .only_org_public ? ( - Yes - ) : ( - No - )} -
- ), - status: ( - { - mutate("/api/manage/admin/connector/indexing-status"); - }} - /> - ), - delete: ( - - mutate("/api/manage/admin/connector/indexing-status") - } - /> - ), - }) - )} - /> - ); -}; diff --git a/web/src/app/admin/connectors/google-drive/page.tsx b/web/src/app/admin/connectors/google-drive/page.tsx deleted file mode 100644 index 121d8af9d0a..00000000000 --- a/web/src/app/admin/connectors/google-drive/page.tsx +++ /dev/null @@ -1,427 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GoogleDriveIcon } from "@/components/icons/icons"; -import useSWR, { useSWRConfig } from "swr"; -import { FetchError, errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - ConnectorIndexingStatus, - Credential, - GoogleDriveConfig, - GoogleDriveCredentialJson, - GoogleDriveServiceAccountCredentialJson, -} from "@/lib/types"; -import { linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { - BooleanFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { GoogleDriveConnectorsTable } from "./GoogleDriveConnectorsTable"; -import { googleDriveConnectorNameBuilder } from "./utils"; -import { DriveOAuthSection, DriveJsonUploadSection } from "./Credential"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Divider, Text, Title } from "@tremor/react"; - -interface GoogleDriveConnectorManagementProps { - googleDrivePublicCredential?: Credential; - googleDriveServiceAccountCredential?: Credential; - googleDriveConnectorIndexingStatus: ConnectorIndexingStatus< - GoogleDriveConfig, - GoogleDriveCredentialJson - > | null; - googleDriveConnectorIndexingStatuses: ConnectorIndexingStatus< - GoogleDriveConfig, - GoogleDriveCredentialJson - >[]; - credentialIsLinked: boolean; - setPopup: (popupSpec: PopupSpec | null) => void; -} - -const GoogleDriveConnectorManagement = ({ - googleDrivePublicCredential, - googleDriveServiceAccountCredential, - googleDriveConnectorIndexingStatus, - googleDriveConnectorIndexingStatuses, - credentialIsLinked, - setPopup, -}: GoogleDriveConnectorManagementProps) => { - const { mutate } = useSWRConfig(); - - const liveCredential = - googleDrivePublicCredential || googleDriveServiceAccountCredential; - if (!liveCredential) { - return ( - - Please authenticate with Google Drive as described in Step 2! Once done - with that, you can then move on to enable this connector. - - ); - } - - // NOTE: if the connector has no credential linked, then it will not be - // returned by the indexing-status API - // if (!googleDriveConnectorIndexingStatus) { - // return ( - // <> - //

- // Fill out the form below to create a connector. We will refresh the - // latest documents from Google Drive every 10 minutes. - //

- //
- //

Add Connector

- // - // nameBuilder={googleDriveConnectorNameBuilder} - // source="google_drive" - // inputType="poll" - // formBodyBuilder={(values) => ( - //
- // {TextArrayFieldBuilder({ - // name: "folder_paths", - // label: "Folder Paths", - // subtext: - // "Specify 0 or more folder paths to index! For example, specifying the path " + - // "'Engineering/Materials' will cause us to only index all files contained " + - // "within the 'Materials' folder within the 'Engineering' folder. " + - // "If no folder paths are specified, we will index all documents in your drive.", - // })(values)} - // - //
- // )} - // validationSchema={Yup.object().shape({ - // folder_paths: Yup.array() - // .of( - // Yup.string().required( - // "Please specify a folder path for your google drive e.g. 'Engineering/Materials'" - // ) - // ) - // .required(), - // include_shared: Yup.boolean().required(), - // })} - // initialValues={{ - // folder_paths: [], - // }} - // refreshFreq={10 * 60} // 10 minutes - // onSubmit={async (isSuccess, responseJson) => { - // if (isSuccess && responseJson) { - // await linkCredential( - // responseJson.id, - // googleDrivePublicCredential.id - // ); - // mutate("/api/manage/admin/connector/indexing-status"); - // } - // }} - // /> - //
- // - // ); - // } - - // If the connector has no credential, we will just hit the ^ section. - // Leaving this in for now in case we want to change this behavior later - // if (!credentialIsLinked) { - // <> - //

- // Click the button below to link your credentials! Once this is done, all - // public documents in your Google Drive will be searchable. We will - // refresh the latest documents every 10 minutes. - //

- // - // ; - // } - - return ( -
- -
- {googleDriveConnectorIndexingStatuses.length > 0 ? ( - <> - Checkout the{" "} - - status page - {" "} - for the latest indexing status. We fetch the latest documents from - Google Drive every 10 minutes. - - ) : ( -

- Fill out the form below to create a connector. We will refresh the - latest documents from Google Drive every 10 minutes. -

- )} -
-
- {googleDriveConnectorIndexingStatuses.length > 0 && ( - <> -
Existing Connectors:
- - - - )} - - {googleDriveConnectorIndexingStatuses.length > 0 && ( -

Add New Connector:

- )} - - - nameBuilder={googleDriveConnectorNameBuilder} - source="google_drive" - inputType="poll" - formBodyBuilder={(values) => ( - <> - {TextArrayFieldBuilder({ - name: "folder_paths", - label: "Folder Paths", - subtext: - "Specify 0 or more folder paths to index! For example, specifying the path " + - "'Engineering/Materials' will cause us to only index all files contained " + - "within the 'Materials' folder within the 'Engineering' folder. " + - "If no folder paths are specified, we will index all documents in your drive.", - })(values)} - - - - - )} - validationSchema={Yup.object().shape({ - folder_paths: Yup.array() - .of( - Yup.string().required( - "Please specify a folder path for your google drive e.g. 'Engineering/Materials'" - ) - ) - .required(), - include_shared: Yup.boolean().required(), - follow_shortcuts: Yup.boolean().required(), - only_org_public: Yup.boolean().required(), - })} - initialValues={{ - folder_paths: [], - include_shared: false, - follow_shortcuts: false, - only_org_public: false, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={liveCredential.id} - /> - -
- ); -}; - -const Main = () => { - const { - data: appCredentialData, - isLoading: isAppCredentialLoading, - error: isAppCredentialError, - } = useSWR<{ client_id: string }, FetchError>( - "/api/manage/admin/connector/google-drive/app-credential", - errorHandlingFetcher - ); - const { - data: serviceAccountKeyData, - isLoading: isServiceAccountKeyLoading, - error: isServiceAccountKeyError, - } = useSWR<{ service_account_email: string }, FetchError>( - "/api/manage/admin/connector/google-drive/service-account-key", - errorHandlingFetcher - ); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[], FetchError>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - const { popup, setPopup } = usePopup(); - - const appCredentialSuccessfullyFetched = - appCredentialData || - (isAppCredentialError && isAppCredentialError.status === 404); - const serviceAccountKeySuccessfullyFetched = - serviceAccountKeyData || - (isServiceAccountKeyError && isServiceAccountKeyError.status === 404); - - if ( - (!appCredentialSuccessfullyFetched && isAppCredentialLoading) || - (!serviceAccountKeySuccessfullyFetched && isServiceAccountKeyLoading) || - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ( -
- -
- ); - } - - if (credentialsError || !credentialsData) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ; - } - - if ( - !appCredentialSuccessfullyFetched || - !serviceAccountKeySuccessfullyFetched - ) { - return ( - - ); - } - - const googleDrivePublicCredential: - | Credential - | undefined = credentialsData.find( - (credential) => - credential.credential_json?.google_drive_tokens && credential.admin_public - ); - const googleDriveServiceAccountCredential: - | Credential - | undefined = credentialsData.find( - (credential) => credential.credential_json?.google_drive_service_account_key - ); - const googleDriveConnectorIndexingStatuses: ConnectorIndexingStatus< - GoogleDriveConfig, - GoogleDriveCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "google_drive" - ); - const googleDriveConnectorIndexingStatus = - googleDriveConnectorIndexingStatuses[0]; - - const credentialIsLinked = - (googleDriveConnectorIndexingStatus !== undefined && - googleDrivePublicCredential !== undefined && - googleDriveConnectorIndexingStatus.connector.credential_ids.includes( - googleDrivePublicCredential.id - )) || - (googleDriveConnectorIndexingStatus !== undefined && - googleDriveServiceAccountCredential !== undefined && - googleDriveConnectorIndexingStatus.connector.credential_ids.includes( - googleDriveServiceAccountCredential.id - )); - - return ( - <> - {popup} - - Step 1: Provide your Credentials - - - - - Step 2: Authenticate with Danswer - - 0} - /> - - - Step 3: Start Indexing! - - - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Google Drive" - /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/google-drive/utils.ts b/web/src/app/admin/connectors/google-drive/utils.ts deleted file mode 100644 index d095ec27ed5..00000000000 --- a/web/src/app/admin/connectors/google-drive/utils.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { GoogleDriveConfig } from "@/lib/types"; - -export const googleDriveConnectorNameBuilder = (values: GoogleDriveConfig) => - `GoogleDriveConnector-${ - values.folder_paths && values.folder_paths.join("_") - }-${values.include_shared ? "shared" : "not-shared"}-${ - values.only_org_public ? "org-public" : "all" - }-${ - values.follow_shortcuts ? "follow-shortcuts" : "do-not-follow-shortcuts" - }`; diff --git a/web/src/app/admin/connectors/google-sites/page.tsx b/web/src/app/admin/connectors/google-sites/page.tsx deleted file mode 100644 index 20728633a59..00000000000 --- a/web/src/app/admin/connectors/google-sites/page.tsx +++ /dev/null @@ -1,249 +0,0 @@ -"use client"; - -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; - -import { LoadingAnimation } from "@/components/Loading"; -import { GoogleSitesIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { ConnectorIndexingStatus, GoogleSitesConfig } from "@/lib/types"; -import { Form, Formik } from "formik"; -import { useState } from "react"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { createConnector, runConnector } from "@/lib/connector"; -import { linkCredential } from "@/lib/credential"; -import { FileUpload } from "@/components/admin/connectors/FileUpload"; -import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable"; -import { Spinner } from "@/components/Spinner"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Button, Card, Text, Title } from "@tremor/react"; - -export default function GoogleSites() { - const { mutate } = useSWRConfig(); - const [selectedFiles, setSelectedFiles] = useState([]); - const [filesAreUploading, setFilesAreUploading] = useState(false); - const { popup, setPopup } = usePopup(); - - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const googleSitesIndexingStatuses: ConnectorIndexingStatus< - GoogleSitesConfig, - {} - >[] = - connectorIndexingStatuses?.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "google_sites" - ) ?? []; - - return ( - <> - {popup} - {filesAreUploading && } -
-
- -
- - } - title="Google Sites" - /> - - - For an in-depth guide on how to setup this connector, check out{" "} - - the documentation - - . - - -
- Upload Files - -
- { - const uploadCreateAndTriggerConnector = async () => { - const formData = new FormData(); - - selectedFiles.forEach((file) => { - formData.append("files", file); - }); - - const response = await fetch( - "/api/manage/admin/connector/file/upload", - { method: "POST", body: formData } - ); - const responseJson = await response.json(); - if (!response.ok) { - setPopup({ - message: `Unable to upload files - ${responseJson.detail}`, - type: "error", - }); - return; - } - - const filePaths = responseJson.file_paths as string[]; - const [connectorErrorMsg, connector] = - await createConnector({ - name: `GoogleSitesConnector-${values.base_url}`, - source: "google_sites", - input_type: "load_state", - connector_specific_config: { - base_url: values.base_url, - zip_path: filePaths[0], - }, - refresh_freq: null, - prune_freq: 0, - disabled: false, - }); - if (connectorErrorMsg || !connector) { - setPopup({ - message: `Unable to create connector - ${connectorErrorMsg}`, - type: "error", - }); - return; - } - - const credentialResponse = await linkCredential( - connector.id, - 0, - values.base_url - ); - if (!credentialResponse.ok) { - const credentialResponseJson = - await credentialResponse.json(); - setPopup({ - message: `Unable to link connector to credential - ${credentialResponseJson.detail}`, - type: "error", - }); - return; - } - - const runConnectorErrorMsg = await runConnector( - connector.id, - [0] - ); - if (runConnectorErrorMsg) { - setPopup({ - message: `Unable to run connector - ${runConnectorErrorMsg}`, - type: "error", - }); - return; - } - - mutate("/api/manage/admin/connector/indexing-status"); - setSelectedFiles([]); - formikHelpers.resetForm(); - setPopup({ - type: "success", - message: "Successfully uploaded files!", - }); - }; - - setFilesAreUploading(true); - try { - await uploadCreateAndTriggerConnector(); - } catch (e) { - console.log("Failed to index filels: ", e); - } - setFilesAreUploading(false); - }} - > - {({ values, isSubmitting }) => ( -
- - -

Files:

- -
- -
- - )} -
-
-
-
- -

- Existing Google Site Connectors -

- {isConnectorIndexingStatusesLoading ? ( - - ) : connectorIndexingStatusesError || !connectorIndexingStatuses ? ( -
Error loading indexing history
- ) : googleSitesIndexingStatuses.length > 0 ? ( - - connectorIndexingStatuses={googleSitesIndexingStatuses} - specialColumns={[ - { - header: "Base URL", - key: "base_url", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return ( - - {connectorConfig.base_url} - - ); - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> - ) : ( -

No indexed Google Sites found

- )} -
- - ); -} diff --git a/web/src/app/admin/connectors/google-storage/page.tsx b/web/src/app/admin/connectors/google-storage/page.tsx deleted file mode 100644 index a836df21f6f..00000000000 --- a/web/src/app/admin/connectors/google-storage/page.tsx +++ /dev/null @@ -1,257 +0,0 @@ -"use client"; - -import { AdminPageTitle } from "@/components/admin/Title"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { GoogleStorageIcon, TrashIcon } from "@/components/icons/icons"; -import { LoadingAnimation } from "@/components/Loading"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { usePublicCredentials } from "@/lib/hooks"; -import { ConnectorIndexingStatus, Credential } from "@/lib/types"; - -import { GCSConfig, GCSCredentialJson } from "@/lib/types"; - -import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; -import { useState } from "react"; - -const GCSMain = () => { - const { popup, setPopup } = usePopup(); - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const gcsConnectorIndexingStatuses: ConnectorIndexingStatus< - GCSConfig, - GCSCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "google_cloud_storage" - ); - - const gcsCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.project_id - ); - - return ( - <> - {popup} - - Step 1: Provide your GCS access info - - {gcsCredential ? ( - <> -
-

Existing GCS Access Key ID:

-

- {gcsCredential.credential_json.access_key_id} -

- {", "} -

Secret Access Key:

-

- {gcsCredential.credential_json.secret_access_key} -

{" "} - -
- - ) : ( - <> - -
    -
  • - Provide your GCS Project ID, Client Email, and Private Key for - authentication. -
  • -
  • - These credentials will be used to access your GCS buckets. -
  • -
-
- - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - secret_access_key: Yup.string().required( - "Client Email is required" - ), - access_key_id: Yup.string().required("Private Key is required"), - })} - initialValues={{ - secret_access_key: "", - access_key_id: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which GCS bucket do you want to make searchable? - - - {gcsConnectorIndexingStatuses.length > 0 && ( - <> - - GCS indexing status - - - The latest changes are fetched every 10 minutes. - -
- - includeName={true} - connectorIndexingStatuses={gcsConnectorIndexingStatuses} - liveCredential={gcsCredential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (gcsCredential) { - await linkCredential(connectorId, gcsCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {gcsCredential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your GCS bucket. - - - nameBuilder={(values) => `GCSConnector-${values.bucket_name}`} - ccPairNameBuilder={(values) => - `GCSConnector-${values.bucket_name}` - } - source="google_cloud_storage" - inputType="poll" - formBodyBuilder={(values) => ( -
- - -
- )} - validationSchema={Yup.object().shape({ - bucket_type: Yup.string() - .oneOf(["google_cloud_storage"]) - .required("Bucket type must be google_cloud_storage"), - bucket_name: Yup.string().required( - "Please enter the name of the GCS bucket to index, e.g. my-gcs-bucket" - ), - prefix: Yup.string().default(""), - })} - initialValues={{ - bucket_type: "google_cloud_storage", - bucket_name: "", - prefix: "", - }} - refreshFreq={60 * 60 * 24} // 1 day - credentialId={gcsCredential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- } - title="Google Cloud Storage" - /> - -
- ); -} diff --git a/web/src/app/admin/connectors/guru/page.tsx b/web/src/app/admin/connectors/guru/page.tsx deleted file mode 100644 index 094bbe7c75b..00000000000 --- a/web/src/app/admin/connectors/guru/page.tsx +++ /dev/null @@ -1,244 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { GuruIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - GuruConfig, - GuruCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - isValidating: isCredentialsValidating, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const guruConnectorIndexingStatuses: ConnectorIndexingStatus< - GuruConfig, - GuruCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "guru" - ); - const guruCredential: Credential | undefined = - credentialsData.find((credential) => credential.credential_json?.guru_user); - - return ( - <> - {popup} - - This connector allows you to sync all your Guru Cards into Danswer. - - - - Step 1: Provide your Credentials - - - {guruCredential ? ( - <> -
- Existing Access Token: - - {guruCredential.credential_json?.guru_user_token} - - -
- - ) : ( - <> - - To use the Guru connector, first follow the guide{" "} - - here - {" "} - to generate a User Token. - - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - guru_user: Yup.string().required( - "Please enter your Guru username" - ), - guru_user_token: Yup.string().required( - "Please enter your Guru access token" - ), - })} - initialValues={{ - guru_user: "", - guru_user_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Start indexing! - - {guruCredential ? ( - !guruConnectorIndexingStatuses.length ? ( - <> - - Click the button below to start indexing! We will pull the latest - features, components, and products from Guru every 10{" "} - minutes. - -
- - nameBuilder={() => "GuruConnector"} - ccPairNameBuilder={() => "Guru"} - source="guru" - inputType="poll" - formBody={null} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={guruCredential.id} - /> -
- - ) : ( - <> - - Guru connector is setup! We are pulling the latest cards from Guru - every 10 minutes. - - - connectorIndexingStatuses={guruConnectorIndexingStatuses} - liveCredential={guruCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.guru_user}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (guruCredential) { - await linkCredential(connectorId, guruCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> - - ) - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then start indexing all your Guru cards. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Guru" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/hubspot/page.tsx b/web/src/app/admin/connectors/hubspot/page.tsx deleted file mode 100644 index 199c027b6e7..00000000000 --- a/web/src/app/admin/connectors/hubspot/page.tsx +++ /dev/null @@ -1,232 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { HubSpotIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - HubSpotConfig, - HubSpotCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - isValidating: isCredentialsValidating, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const hubSpotConnectorIndexingStatuses: ConnectorIndexingStatus< - HubSpotConfig, - HubSpotCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "hubspot" - ); - const hubSpotCredential: Credential = - credentialsData.filter( - (credential) => credential.credential_json?.hubspot_access_token - )[0]; - - return ( - <> - {popup} - - This connector allows you to sync all your HubSpot Tickets into Danswer. - - - - Step 1: Provide your Credentials - - - {hubSpotCredential ? ( - <> -
- Existing Access Token: - - {hubSpotCredential.credential_json?.hubspot_access_token} - - -
- - ) : ( - <> - - To use the HubSpot connector, provide the HubSpot Access Token. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - hubspot_access_token: Yup.string().required( - "Please enter your HubSpot Access Token" - ), - })} - initialValues={{ - hubspot_access_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Start indexing! - - {hubSpotCredential ? ( - !hubSpotConnectorIndexingStatuses.length ? ( - <> - - Click the button below to start indexing! We will pull the latest - tickets from HubSpot every 10 minutes. - -
- - nameBuilder={() => "HubSpotConnector"} - ccPairNameBuilder={() => "HubSpotConnector"} - source="hubspot" - inputType="poll" - formBody={null} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={hubSpotCredential.id} - /> -
- - ) : ( - <> - - HubSpot connector is setup! We are pulling the latest tickets from - HubSpot every 10 minutes. - - - connectorIndexingStatuses={hubSpotConnectorIndexingStatuses} - liveCredential={hubSpotCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.hubspot_access_token}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (hubSpotCredential) { - await linkCredential(connectorId, hubSpotCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> - - ) - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then start indexing all your HubSpot tickets. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="HubSpot" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/jira/page.tsx b/web/src/app/admin/connectors/jira/page.tsx deleted file mode 100644 index f960348e6da..00000000000 --- a/web/src/app/admin/connectors/jira/page.tsx +++ /dev/null @@ -1,374 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { JiraIcon, TrashIcon } from "@/components/icons/icons"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - JiraConfig, - JiraCredentialJson, - JiraServerCredentialJson, - ConnectorIndexingStatus, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Divider, Text, Title } from "@tremor/react"; - -// Copied from the `extract_jira_project` function -const extractJiraProject = (url: string): string | null => { - const parsedUrl = new URL(url); - const splitPath = parsedUrl.pathname.split("/"); - const projectPos = splitPath.indexOf("projects"); - if (projectPos !== -1 && splitPath.length > projectPos + 1) { - const jiraProject = splitPath[projectPos + 1]; - return jiraProject; - } - return null; -}; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - isValidating: isCredentialsValidating, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const jiraConnectorIndexingStatuses: ConnectorIndexingStatus< - JiraConfig, - JiraCredentialJson | JiraServerCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "jira" - ); - const jiraCredential = credentialsData.filter( - (credential) => credential.credential_json?.jira_api_token - )[0]; - - return ( - <> - {popup} - - Step 1: Provide your Credentials - - - {jiraCredential ? ( - <> -
-

Existing Access Token:

-

- {jiraCredential.credential_json?.jira_api_token} -

- -
- - ) : ( - <> - - To use the Jira connector, first follow the guide{" "} - - here - {" "} - to generate an Access Token (for cloud) or Personal Access Token - (for server). Submit only one form. - - Cloud - - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - jira_user_email: Yup.string().required( - "Please enter your username on Jira" - ), - jira_api_token: Yup.string().required( - "Please enter your Jira access token" - ), - })} - initialValues={{ - jira_user_email: "", - jira_api_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - Server - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - jira_api_token: Yup.string().required( - "Please enter your Jira personal access token" - ), - })} - initialValues={{ - jira_api_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - {/* TODO: make this periodic */} - - Step 2: Which spaces do you want to make searchable? - - {jiraCredential ? ( - <> - {" "} - - Specify any link to a Jira page below and click "Index" to - Index. Based on the provided link, we will index the ENTIRE PROJECT, - not just the specified page. For example, entering{" "} - - https://danswer.atlassian.net/jira/software/projects/DAN/boards/1 - {" "} - and clicking the Index button will index the whole DAN Jira - project. - - {jiraConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest pages and comments from each space listed - below every 10 minutes. - -
- - connectorIndexingStatuses={jiraConnectorIndexingStatuses} - liveCredential={jiraCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.jira_api_token}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (jiraCredential) { - await linkCredential(connectorId, jiraCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Url", - key: "url", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return ( - - {connectorConfig.jira_project_url} - - ); - }, - }, - { - header: "Disable comments from users", - key: "comment_email_blacklist", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return connectorConfig.comment_email_blacklist && - connectorConfig.comment_email_blacklist.length > 0 - ? connectorConfig.comment_email_blacklist.join(", ") - : ""; - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - - )} - -

Add a New Project

- - nameBuilder={(values) => - `JiraConnector-${values.jira_project_url}` - } - ccPairNameBuilder={(values) => - extractJiraProject(values.jira_project_url) - } - credentialId={jiraCredential.id} - source="jira" - inputType="poll" - formBody={ - <> - - - } - formBodyBuilder={(values) => { - return ( - <> - - {TextArrayFieldBuilder({ - name: "comment_email_blacklist", - label: "Disable comments from users:", - subtext: ` - This is generally useful to ignore certain bots. Add user emails which comments should NOT be indexed.`, - })(values)} - - ); - }} - validationSchema={Yup.object().shape({ - jira_project_url: Yup.string().required( - "Please enter any link to your jira project e.g. https://danswer.atlassian.net/jira/software/projects/DAN/boards/1" - ), - comment_email_blacklist: Yup.array() - .of(Yup.string().required("Emails names must be strings")) - .required(), - })} - initialValues={{ - jira_project_url: "", - comment_email_blacklist: [], - }} - refreshFreq={10 * 60} // 10 minutes - /> -
- - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then specify which Jira projects you want to make - searchable. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Jira" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/linear/page.tsx b/web/src/app/admin/connectors/linear/page.tsx deleted file mode 100644 index 6af018729dc..00000000000 --- a/web/src/app/admin/connectors/linear/page.tsx +++ /dev/null @@ -1,236 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { LinearIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - LinearCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const linearConnectorIndexingStatuses: ConnectorIndexingStatus< - {}, - LinearCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "linear" - ); - const linearCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.linear_api_key - ); - - return ( - <> - {popup} - - Step 1: Provide your Credentials - - - {linearCredential ? ( - <> -
- Existing API Key: - - {linearCredential.credential_json?.linear_api_key} - - -
- - ) : ( - <> - - To use the Linear connector, first follow the guide{" "} - - here - {" "} - to generate an API Key. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - linear_api_key: Yup.string().required( - "Please enter your Linear API Key!" - ), - })} - initialValues={{ - linear_api_key: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Start indexing - - {linearCredential ? ( - <> - {linearConnectorIndexingStatuses.length > 0 ? ( - <> - - We pull the latest issues and comments every{" "} - 10 minutes. - -
- - connectorIndexingStatuses={linearConnectorIndexingStatuses} - liveCredential={linearCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.linear_api_key}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (linearCredential) { - await linkCredential(connectorId, linearCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - ) : ( - -

Create Connector

-

- Press connect below to start the connection Linear. We pull the - latest issues and comments every 10{" "} - minutes. -

- - nameBuilder={() => "LinearConnector"} - ccPairNameBuilder={() => "Linear"} - source="linear" - inputType="poll" - formBody={<>} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={linearCredential.id} - /> -
- )} - - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then start indexing Linear. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Linear" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/loopio/page.tsx b/web/src/app/admin/connectors/loopio/page.tsx deleted file mode 100644 index 920d15b8246..00000000000 --- a/web/src/app/admin/connectors/loopio/page.tsx +++ /dev/null @@ -1,263 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { LoopioIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - Credential, - ConnectorIndexingStatus, - LoopioConfig, - LoopioCredentialJson, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - isValidating: isCredentialsValidating, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const loopioConnectorIndexingStatuses: ConnectorIndexingStatus< - LoopioConfig, - LoopioCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "loopio" - ); - const loopioCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.loopio_client_id - ); - - return ( - <> - {popup} -

- This connector allows you to sync your Loopio Library Entries into - Danswer -

- -

- Step 1: Provide your API Access info -

- - {loopioCredential ? ( - <> -
-

Existing App Key Secret:

-

- {loopioCredential.credential_json?.loopio_client_token} -

- -
- - ) : ( - <> -
- - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - loopio_subdomain: Yup.string().required( - "Please enter your Loopio Account subdomain" - ), - loopio_client_id: Yup.string().required( - "Please enter your Loopio App Key ID" - ), - loopio_client_token: Yup.string().required( - "Please enter your Loopio App Key Secret" - ), - })} - initialValues={{ - loopio_subdomain: "", - loopio_client_id: "", - loopio_client_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> -
- - )} - -

- Step 2: Which Stack do you want to make searchable? -

-

- Leave this blank if you want to index all stacks. -

- - {loopioConnectorIndexingStatuses.length > 0 && ( - <> -

- We pull the latest library entries every 24 hours. -

-
- - connectorIndexingStatuses={loopioConnectorIndexingStatuses} - liveCredential={loopioCredential} - getCredential={(credential) => - credential.credential_json.loopio_client_id - } - specialColumns={[ - { - header: "Stack", - key: "loopio_stack_name", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .loopio_stack_name || "All stacks", - }, - ]} - includeName={true} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (loopioCredential) { - await linkCredential(connectorId, loopioCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - )} - - {loopioCredential ? ( - <> -
-

Create a new Loopio Connector

- - nameBuilder={(values) => - values?.loopio_stack_name - ? `LoopioConnector-${values.loopio_stack_name}-Stack` - : `LoopioConnector-AllStacks` - } - source="loopio" - inputType="poll" - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - loopio_stack_name: Yup.string(), - })} - initialValues={{ - loopio_stack_name: "", - }} - refreshFreq={60 * 60 * 24} // 24 hours - credentialId={loopioCredential.id} - /> -
- - ) : ( -

- Please provide your API Access Info in Step 1 first! Once done with - that, you can start indexing your Loopio library. -

- )} - - ); -}; - -export default function Page() { - return ( -
-
- -
-
- -

Loopio

-
-
-
- ); -} diff --git a/web/src/app/admin/connectors/mediawiki/page.tsx b/web/src/app/admin/connectors/mediawiki/page.tsx deleted file mode 100644 index e0c17a6e72d..00000000000 --- a/web/src/app/admin/connectors/mediawiki/page.tsx +++ /dev/null @@ -1,219 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { MediaWikiIcon, TrashIcon } from "@/components/icons/icons"; -import { - TextArrayField, - TextArrayFieldBuilder, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - MediaWikiCredentialJson, - MediaWikiConfig, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const mediawikiConnectorIndexingStatuses: ConnectorIndexingStatus< - MediaWikiConfig, - MediaWikiCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "mediawiki" - ); - const mediawikiCredential: Credential | undefined = - credentialsData.find((credential) => true); - - return ( - <> - {popup} - {mediawikiConnectorIndexingStatuses.length > 0 && ( - <> - - MediaWiki indexing status - - - The latest page, chapter, book and shelf changes are fetched every - 10 minutes. - -
- - connectorIndexingStatuses={mediawikiConnectorIndexingStatuses} - liveCredential={mediawikiCredential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (mediawikiCredential) { - await linkCredential(connectorId, mediawikiCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {mediawikiCredential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your MediaWiki - instance. - - - nameBuilder={(values) => - `MediaWikiConnector-${values.connector_name}` - } - ccPairNameBuilder={(values) => - `MediaWikiConnector-${values.connector_name}` - } - source="mediawiki" - inputType="poll" - formBodyBuilder={(values) => ( -
- - - - {TextArrayFieldBuilder({ - name: "pages", - label: "Pages to index:", - subtext: - "Specify 0 or more names of pages to index. Only specify the name of the page, not its url.", - })(values)} - {TextArrayFieldBuilder({ - name: "categories", - label: "Categories to index:", - subtext: - "Specify 0 or more names of categories to index. For most MediaWiki sites, these are pages" + - " with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only" + - " specify the name of the category, not its url.", - })(values)} - -
- )} - validationSchema={Yup.object().shape({ - connector_name: Yup.string().required( - "Please enter a name for your MediaWiki connector." - ), - hostname: Yup.string().required( - "Please enter the base URL for your MediaWiki site" - ), - language_code: Yup.string().default("en"), - categories: Yup.array().of( - Yup.string().required( - "Please enter categories to index from your MediaWiki site" - ) - ), - pages: Yup.array().of( - Yup.string().required( - "Please enter pages to index from your MediaWiki site" - ) - ), - recurse_depth: Yup.number().required( - "Please enter the recursion depth for your MediaWiki site." - ), - })} - initialValues={{ - connector_name: "", - hostname: "", - language_code: "en", - categories: [], - pages: [], - recurse_depth: 0, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={mediawikiCredential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="MediaWiki" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/notion/page.tsx b/web/src/app/admin/connectors/notion/page.tsx deleted file mode 100644 index aa205dce0ee..00000000000 --- a/web/src/app/admin/connectors/notion/page.tsx +++ /dev/null @@ -1,272 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { NotionIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - NotionCredentialJson, - NotionConfig, - Credential, - ConnectorIndexingStatus, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Divider, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const notionConnectorIndexingStatuses: ConnectorIndexingStatus< - NotionConfig, - NotionCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "notion" - ); - const notionCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.notion_integration_token - ); - - return ( - <> - {popup} - - Step 1: Provide your authorization details - - - {notionCredential ? ( - <> -
-

Existing Integration Token:

-

- {notionCredential.credential_json?.notion_integration_token} -

- -
- - ) : ( - <> - - To get started you'll need to create an internal integration in - Notion for Danswer. Follow the instructions in the  - - Notion Developer Documentation - -   on the Notion website, to create a new integration. Once - you've created an integration, copy the integration secret - token and paste it below. Follow the remaining instructions on the - Notion docs to allow Danswer to read Notion Databases and Pages - using the new integration. - - - - formBody={ - - } - validationSchema={Yup.object().shape({ - notion_integration_token: Yup.string().required( - "Please enter the Notion Integration token for the Danswer integration." - ), - })} - initialValues={{ - notion_integration_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> - - - )} - - - Step 2: Manage Connectors - - {notionConnectorIndexingStatuses.length > 0 && ( - <> - - The latest page updates are fetched from Notion every 10 minutes. - -
- - connectorIndexingStatuses={notionConnectorIndexingStatuses} - specialColumns={[ - { - header: "Root Page ID", - key: "root_page_id", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .root_page_id || "-", - }, - ]} - liveCredential={notionCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.notion_integration_token}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (notionCredential) { - await linkCredential(connectorId, notionCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - - )} - - {notionCredential && ( - <> - -

Create New Connection

-

- Press connect below to start the connection to Notion. -

- - nameBuilder={(values) => - values.root_page_id - ? `NotionConnector-${values.root_page_id}` - : "NotionConnector" - } - ccPairNameBuilder={(values) => - values.root_page_id ? `Notion-${values.root_page_id}` : "Notion" - } - source="notion" - inputType="poll" - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - root_page_id: Yup.string(), - })} - initialValues={{ - root_page_id: "", - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={notionCredential.id} - /> -
- - )} - - {!notionCredential && ( - <> - - Please provide your integration details in Step 1 first! Once done - with that, you'll be able to start the connection then see - indexing status. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Notion" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/oracle-storage/page.tsx b/web/src/app/admin/connectors/oracle-storage/page.tsx deleted file mode 100644 index 34847a4b9a1..00000000000 --- a/web/src/app/admin/connectors/oracle-storage/page.tsx +++ /dev/null @@ -1,272 +0,0 @@ -"use client"; - -import { AdminPageTitle } from "@/components/admin/Title"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { OCIStorageIcon, TrashIcon } from "@/components/icons/icons"; -import { LoadingAnimation } from "@/components/Loading"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { usePublicCredentials } from "@/lib/hooks"; - -import { - ConnectorIndexingStatus, - Credential, - OCIConfig, - OCICredentialJson, - R2Config, - R2CredentialJson, -} from "@/lib/types"; -import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; -import { useState } from "react"; - -const OCIMain = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const ociConnectorIndexingStatuses: ConnectorIndexingStatus< - OCIConfig, - OCICredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "oci_storage" - ); - - const ociCredential: Credential | undefined = - credentialsData.find((credential) => credential.credential_json?.namespace); - - return ( - <> - {popup} - - Step 1: Provide your access info - - {ociCredential ? ( - <> - {" "} -
-

Existing OCI Access Key ID:

-

- {ociCredential.credential_json.access_key_id} -

- {", "} -

Namespace:

-

- {ociCredential.credential_json.namespace} -

{" "} - -
- - ) : ( - <> - -
    -
  • - Provide your OCI Access Key ID, Secret Access Key, Namespace, - and Region for authentication. -
  • -
  • - These credentials will be used to access your OCI buckets. -
  • -
-
- - - formBody={ - <> - - - - - - } - validationSchema={Yup.object().shape({ - access_key_id: Yup.string().required( - "OCI Access Key ID is required" - ), - secret_access_key: Yup.string().required( - "OCI Secret Access Key is required" - ), - namespace: Yup.string().required("Namespace is required"), - region: Yup.string().required("Region is required"), - })} - initialValues={{ - access_key_id: "", - secret_access_key: "", - namespace: "", - region: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which OCI bucket do you want to make searchable? - - - {ociConnectorIndexingStatuses.length > 0 && ( - <> - - OCI indexing status - - - The latest changes are fetched every 10 minutes. - -
- - includeName={true} - connectorIndexingStatuses={ociConnectorIndexingStatuses} - liveCredential={ociCredential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (ociCredential) { - await linkCredential(connectorId, ociCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {ociCredential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your OCI bucket. - - - nameBuilder={(values) => `OCIConnector-${values.bucket_name}`} - ccPairNameBuilder={(values) => - `OCIConnector-${values.bucket_name}` - } - source="oci_storage" - inputType="poll" - formBodyBuilder={(values) => ( -
- - -
- )} - validationSchema={Yup.object().shape({ - bucket_type: Yup.string() - .oneOf(["oci_storage"]) - .required("Bucket type must be oci_storage"), - bucket_name: Yup.string().required( - "Please enter the name of the OCI bucket to index, e.g. my-test-bucket" - ), - prefix: Yup.string().default(""), - })} - initialValues={{ - bucket_type: "oci_storage", - bucket_name: "", - prefix: "", - }} - refreshFreq={60 * 60 * 24} // 1 day - credentialId={ociCredential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- } - title="Oracle Cloud Infrastructure" - /> - -
- ); -} diff --git a/web/src/app/admin/connectors/productboard/page.tsx b/web/src/app/admin/connectors/productboard/page.tsx deleted file mode 100644 index 1694baa8ccb..00000000000 --- a/web/src/app/admin/connectors/productboard/page.tsx +++ /dev/null @@ -1,254 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { ProductboardIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - ProductboardConfig, - ConnectorIndexingStatus, - ProductboardCredentialJson, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - isValidating: isCredentialsValidating, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const productboardConnectorIndexingStatuses: ConnectorIndexingStatus< - ProductboardConfig, - ProductboardCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "productboard" - ); - const productboardCredential: - | Credential - | undefined = credentialsData.find( - (credential) => credential.credential_json?.productboard_access_token - ); - - return ( - <> - {popup} - - This connector allows you to sync all your Features,{" "} - Components, Products, and Objectives from - Productboard into Danswer. At this time, the Productboard APIs does not - support pulling in Releases or Notes. - - - - Step 1: Provide your Credentials - - - {productboardCredential ? ( - <> -
- Existing Access Token: - - { - productboardCredential.credential_json - ?.productboard_access_token - } - - -
- - ) : ( - <> - - To use the Productboard connector, first follow the guide{" "} - - here - {" "} - to generate an Access Token. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - productboard_access_token: Yup.string().required( - "Please enter your Productboard access token" - ), - })} - initialValues={{ - productboard_access_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Start indexing! - - {productboardCredential ? ( - !productboardConnectorIndexingStatuses.length ? ( - <> - - Click the button below to start indexing! We will pull the latest - features, components, and products from Productboard every{" "} - 10 minutes. - -
- - nameBuilder={() => "ProductboardConnector"} - ccPairNameBuilder={() => "Productboard"} - source="productboard" - inputType="poll" - formBody={null} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={productboardCredential.id} - /> -
- - ) : ( - <> - - Productboard connector is setup! We are pulling the latest - features, components, and products from Productboard every{" "} - 10 minutes. - - - connectorIndexingStatuses={productboardConnectorIndexingStatuses} - liveCredential={productboardCredential} - getCredential={(credential) => { - return ( -
-

- {credential.credential_json.productboard_access_token} -

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (productboardCredential) { - await linkCredential(connectorId, productboardCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> - - ) - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then start indexing all your Productboard features, - components, and products. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Productboard" - /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/r2/page.tsx b/web/src/app/admin/connectors/r2/page.tsx deleted file mode 100644 index 372660acc4f..00000000000 --- a/web/src/app/admin/connectors/r2/page.tsx +++ /dev/null @@ -1,265 +0,0 @@ -"use client"; - -import { AdminPageTitle } from "@/components/admin/Title"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { R2Icon, S3Icon, TrashIcon } from "@/components/icons/icons"; -import { LoadingAnimation } from "@/components/Loading"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { usePublicCredentials } from "@/lib/hooks"; -import { - ConnectorIndexingStatus, - Credential, - R2Config, - R2CredentialJson, -} from "@/lib/types"; -import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; -import { useState } from "react"; - -const R2Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const r2ConnectorIndexingStatuses: ConnectorIndexingStatus< - R2Config, - R2CredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "r2" - ); - - const r2Credential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.account_id - ); - - return ( - <> - {popup} - - Step 1: Provide your access info - - {r2Credential ? ( - <> - {" "} -
-

Existing R2 Access Key ID:

-

- {r2Credential.credential_json.r2_access_key_id} -

- {", "} -

Account ID:

-

- {r2Credential.credential_json.account_id} -

{" "} - -
- - ) : ( - <> - -
    -
  • - Provide your R2 Access Key ID, Secret Access Key, and Account ID - for authentication. -
  • -
  • These credentials will be used to access your R2 buckets.
  • -
-
- - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - r2_access_key_id: Yup.string().required( - "R2 Access Key ID is required" - ), - r2_secret_access_key: Yup.string().required( - "R2 Secret Access Key is required" - ), - account_id: Yup.string().required("Account ID is required"), - })} - initialValues={{ - r2_access_key_id: "", - r2_secret_access_key: "", - account_id: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which R2 bucket do you want to make searchable? - - - {r2ConnectorIndexingStatuses.length > 0 && ( - <> - - R2 indexing status - - - The latest changes are fetched every 10 minutes. - -
- - includeName={true} - connectorIndexingStatuses={r2ConnectorIndexingStatuses} - liveCredential={r2Credential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (r2Credential) { - await linkCredential(connectorId, r2Credential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {r2Credential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your R2 bucket. - - - nameBuilder={(values) => `R2Connector-${values.bucket_name}`} - ccPairNameBuilder={(values) => - `R2Connector-${values.bucket_name}` - } - source="r2" - inputType="poll" - formBodyBuilder={(values) => ( -
- - -
- )} - validationSchema={Yup.object().shape({ - bucket_type: Yup.string() - .oneOf(["r2"]) - .required("Bucket type must be r2"), - bucket_name: Yup.string().required( - "Please enter the name of the r2 bucket to index, e.g. my-test-bucket" - ), - prefix: Yup.string().default(""), - })} - initialValues={{ - bucket_type: "r2", - bucket_name: "", - prefix: "", - }} - refreshFreq={60 * 60 * 24} // 1 day - credentialId={r2Credential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - const [selectedStorage, setSelectedStorage] = useState("s3"); - - return ( -
-
- -
- } title="R2 Storage" /> - -
- ); -} diff --git a/web/src/app/admin/connectors/request-tracker/page.tsx b/web/src/app/admin/connectors/request-tracker/page.tsx deleted file mode 100644 index 147dd1ae2e6..00000000000 --- a/web/src/app/admin/connectors/request-tracker/page.tsx +++ /dev/null @@ -1,256 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, RequestTrackerIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - RequestTrackerConfig, - RequestTrackerCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; // Modify or create these types as required -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const requestTrackerConnectorIndexingStatuses: ConnectorIndexingStatus< - RequestTrackerConfig, - RequestTrackerCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "requesttracker" - ); - - const requestTrackerCredential: - | Credential - | undefined = credentialsData.find( - (credential) => credential.credential_json?.requesttracker_username - ); - - return ( - <> - - Step 1: Provide Request Tracker credentials - - {requestTrackerCredential ? ( - <> -
- Existing Request Tracker username: - - {requestTrackerCredential.credential_json.requesttracker_username} - - -
- - ) : ( - <> - - To use the Request Tracker connector, provide a Request Tracker - username, password, and base url. - - - This connector currently supports{" "} - - Request Tracker REST API 1.0 - - ,{" "} - not the latest REST API 2.0 introduced in Request Tracker 5.0 - . - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - requesttracker_username: Yup.string().required( - "Please enter your Request Tracker username" - ), - requesttracker_password: Yup.string().required( - "Please enter your Request Tracker password" - ), - requesttracker_base_url: Yup.string() - .url() - .required( - "Please enter the base url of your RT installation" - ), - })} - initialValues={{ - requesttracker_username: "", - requesttracker_password: "", - requesttracker_base_url: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Manage Request Tracker Connector - - - {requestTrackerConnectorIndexingStatuses.length > 0 && ( - <> - - We index the most recently updated tickets from each Request Tracker - instance listed below regularly. - - - The initial poll at this time retrieves tickets updated in the past - hour. All subsequent polls execute every ten minutes. This should be - configurable in the future. - -
- - connectorIndexingStatuses={ - requestTrackerConnectorIndexingStatuses - } - liveCredential={requestTrackerCredential} - getCredential={(credential) => - credential.credential_json.requesttracker_base_url - } - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (requestTrackerCredential) { - await linkCredential( - connectorId, - requestTrackerCredential.id - ); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - )} - - {requestTrackerCredential && - requestTrackerConnectorIndexingStatuses.length === 0 ? ( - - - nameBuilder={(values) => - `RequestTracker-${requestTrackerCredential.credential_json.requesttracker_base_url}` - } - ccPairNameBuilder={(values) => - `Request Tracker ${requestTrackerCredential.credential_json.requesttracker_base_url}` - } - source="requesttracker" - inputType="poll" - validationSchema={Yup.object().shape({})} - formBody={<>} - initialValues={{}} - credentialId={requestTrackerCredential.id} - refreshFreq={10 * 60} // 10 minutes - /> - - ) : ( - <> - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } - title="Request Tracker" - /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/s3/page.tsx b/web/src/app/admin/connectors/s3/page.tsx deleted file mode 100644 index 81064a70bf1..00000000000 --- a/web/src/app/admin/connectors/s3/page.tsx +++ /dev/null @@ -1,258 +0,0 @@ -"use client"; - -import { AdminPageTitle } from "@/components/admin/Title"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { S3Icon, TrashIcon } from "@/components/icons/icons"; -import { LoadingAnimation } from "@/components/Loading"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { usePublicCredentials } from "@/lib/hooks"; -import { - ConnectorIndexingStatus, - Credential, - S3Config, - S3CredentialJson, -} from "@/lib/types"; -import { Card, Text, Title } from "@tremor/react"; -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; -import { useState } from "react"; - -const S3Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const s3ConnectorIndexingStatuses: ConnectorIndexingStatus< - S3Config, - S3CredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "s3" - ); - - const s3Credential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.aws_access_key_id - ); - - return ( - <> - {popup} - - Step 1: Provide your access info - - {s3Credential ? ( - <> - {" "} -
-

Existing AWS Access Key ID:

-

- {s3Credential.credential_json.aws_access_key_id} -

- -
- - ) : ( - <> - -
    -
  • - If AWS Access Key ID and AWS Secret Access Key are provided, - they will be used for authenticating the connector. -
  • -
  • Otherwise, the Profile Name will be used (if provided).
  • -
  • - If no credentials are provided, then the connector will try to - authenticate with any default AWS credentials available. -
  • -
-
- - - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - aws_access_key_id: Yup.string().default(""), - aws_secret_access_key: Yup.string().default(""), - })} - initialValues={{ - aws_access_key_id: "", - aws_secret_access_key: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which S3 bucket do you want to make searchable? - - - {s3ConnectorIndexingStatuses.length > 0 && ( - <> - - S3 indexing status - - - The latest changes are fetched every 10 minutes. - -
- - includeName={true} - connectorIndexingStatuses={s3ConnectorIndexingStatuses} - liveCredential={s3Credential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (s3Credential) { - await linkCredential(connectorId, s3Credential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {s3Credential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your S3 bucket. - - - nameBuilder={(values) => `S3Connector-${values.bucket_name}`} - ccPairNameBuilder={(values) => - `S3Connector-${values.bucket_name}` - } - source="s3" - inputType="poll" - formBodyBuilder={(values) => ( -
- - -
- )} - validationSchema={Yup.object().shape({ - bucket_type: Yup.string() - .oneOf(["s3"]) - .required("Bucket type must be s3"), - bucket_name: Yup.string().required( - "Please enter the name of the s3 bucket to index, e.g. my-test-bucket" - ), - prefix: Yup.string().default(""), - })} - initialValues={{ - bucket_type: "s3", - bucket_name: "", - prefix: "", - }} - refreshFreq={60 * 60 * 24} // 1 day - credentialId={s3Credential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - const [selectedStorage, setSelectedStorage] = useState("s3"); - - return ( -
-
- -
- } title="S3 Storage" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/salesforce/page.tsx b/web/src/app/admin/connectors/salesforce/page.tsx deleted file mode 100644 index 8771b14f94b..00000000000 --- a/web/src/app/admin/connectors/salesforce/page.tsx +++ /dev/null @@ -1,290 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, SalesforceIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon -import { errorHandlingFetcher as fetcher } from "@/lib/fetcher"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - SalesforceConfig, - SalesforceCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; // Modify or create these types as required -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: isConnectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - fetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: isCredentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { - return
Failed to load connectors
; - } - - if (isCredentialsError || !credentialsData) { - return
Failed to load credentials
; - } - - const SalesforceConnectorIndexingStatuses: ConnectorIndexingStatus< - SalesforceConfig, - SalesforceCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "salesforce" - ); - - const SalesforceCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.sf_username - ); - - return ( - <> - - The Salesforce connector allows you to index and search through your - Salesforce data. Once setup, all indicated Salesforce data will be will - be queryable within Danswer. - - - - Step 1: Provide Salesforce credentials - - {SalesforceCredential ? ( - <> -
- Existing SalesForce Username: - - {SalesforceCredential.credential_json.sf_username} - - -
- - ) : ( - <> - - As a first step, please provide the Salesforce admin account's - username, password, and Salesforce security token. You can follow - the guide{" "} - - here - {" "} - to create get your Salesforce Security Token. - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - sf_username: Yup.string().required( - "Please enter your Salesforce username" - ), - sf_password: Yup.string().required( - "Please enter your Salesforce password" - ), - sf_security_token: Yup.string().required( - "Please enter your Salesforce security token" - ), - })} - initialValues={{ - sf_username: "", - sf_password: "", - sf_security_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Manage Salesforce Connector - - - {SalesforceConnectorIndexingStatuses.length > 0 && ( - <> - - The latest state of your Salesforce objects are fetched every 10 - minutes. - -
- - connectorIndexingStatuses={SalesforceConnectorIndexingStatuses} - liveCredential={SalesforceCredential} - getCredential={(credential) => - credential.credential_json.sf_security_token - } - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (SalesforceCredential) { - await linkCredential(connectorId, SalesforceCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Connectors", - key: "connectors", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.requested_objects}`; - }, - }, - ]} - includeName - /> -
- - )} - - {SalesforceCredential ? ( - - - nameBuilder={(values) => - values.requested_objects && values.requested_objects.length > 0 - ? `Salesforce-${values.requested_objects.join("-")}` - : "Salesforce" - } - ccPairNameBuilder={(values) => - values.requested_objects && values.requested_objects.length > 0 - ? `Salesforce-${values.requested_objects.join("-")}` - : "Salesforce" - } - source="salesforce" - inputType="poll" - // formBody={<>} - formBodyBuilder={TextArrayFieldBuilder({ - name: "requested_objects", - label: "Specify Salesforce objects to organize by:", - subtext: ( - <> -
- Specify the Salesforce object types you want us to index.{" "} -
-
- Click - - {" "} - here{" "} - - for an example of how Danswer uses the objects.
-
- If unsure, don't specify any objects and Danswer will - default to indexing by 'Account'. -
-
- Hint: Use the singular form of the object name (e.g., - 'Opportunity' instead of 'Opportunities'). - - ), - })} - validationSchema={Yup.object().shape({ - requested_objects: Yup.array() - .of( - Yup.string().required( - "Salesforce object names must be strings" - ) - ) - .required(), - })} - initialValues={{ - requested_objects: [], - }} - credentialId={SalesforceCredential.id} - refreshFreq={10 * 60} // 10 minutes - /> -
- ) : ( - - Please provide all Salesforce info in Step 1 first! Once you're - done with that, you can then specify which Salesforce objects you want - to make searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Salesforce" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/sharepoint/page.tsx b/web/src/app/admin/connectors/sharepoint/page.tsx deleted file mode 100644 index bf970415472..00000000000 --- a/web/src/app/admin/connectors/sharepoint/page.tsx +++ /dev/null @@ -1,294 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, SharepointIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - SharepointConfig, - SharepointCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; // Modify or create these types as required -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const sharepointConnectorIndexingStatuses: ConnectorIndexingStatus< - SharepointConfig, - SharepointCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "sharepoint" - ); - - const sharepointCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.sp_client_id - ); - - return ( - <> - - The Sharepoint connector allows you to index and search through your - Sharepoint files. Once setup, your Word documents, Excel files, - PowerPoint presentations, OneNote notebooks, PDFs, and uploaded files - will be queryable within Danswer. - - - - Step 1: Provide Sharepoint credentials - - {sharepointCredential ? ( - <> -
- Existing Azure AD Client ID: - - {sharepointCredential.credential_json.sp_client_id} - - -
- - ) : ( - <> - - As a first step, please provide Application (client) ID, Directory - (tenant) ID, and Client Secret. You can follow the guide{" "} - - here - {" "} - to create an Azure AD application and obtain these values. - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - sp_client_id: Yup.string().required( - "Please enter your Application (client) ID" - ), - sp_directory_id: Yup.string().required( - "Please enter your Directory (tenant) ID" - ), - sp_client_secret: Yup.string().required( - "Please enter your Client Secret" - ), - })} - initialValues={{ - sp_client_id: "", - sp_directory_id: "", - sp_client_secret: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Manage Sharepoint Connector - - - {sharepointConnectorIndexingStatuses.length > 0 && ( - <> - - The latest state of your Word documents, Excel files, PowerPoint - presentations, OneNote notebooks, PDFs, and uploaded files are - fetched every 10 minutes. - -
- - connectorIndexingStatuses={sharepointConnectorIndexingStatuses} - liveCredential={sharepointCredential} - getCredential={(credential) => - credential.credential_json.sp_directory_id - } - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (sharepointCredential) { - await linkCredential(connectorId, sharepointCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Connectors", - key: "connectors", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.sites}`; - }, - }, - ]} - includeName - /> -
- - )} - - {sharepointCredential ? ( - - - nameBuilder={(values) => - values.sites && values.sites.length > 0 - ? `Sharepoint-${values.sites.join("-")}` - : "Sharepoint" - } - ccPairNameBuilder={(values) => - values.sites && values.sites.length > 0 - ? `Sharepoint-${values.sites.join("-")}` - : "Sharepoint" - } - source="sharepoint" - inputType="poll" - // formBody={<>} - formBodyBuilder={TextArrayFieldBuilder({ - name: "sites", - label: "Sites:", - subtext: ( - <> -
-
    -
  • - • If no sites are specified, all sites in your - organization will be indexed (Sites.Read.All permission - required). -
  • -
  • - • Specifying - 'https://danswerai.sharepoint.com/sites/support' - for example will only index documents within this site. -
  • -
  • - • Specifying - 'https://danswerai.sharepoint.com/sites/support/subfolder' - for example will only index documents within this folder. -
  • -
- - ), - })} - validationSchema={Yup.object().shape({ - sites: Yup.array() - .of(Yup.string().required("Site names must be strings")) - .required(), - })} - initialValues={{ - sites: [], - }} - credentialId={sharepointCredential.id} - refreshFreq={10 * 60} // 10 minutes - /> -
- ) : ( - - Please provide all Azure info in Step 1 first! Once you're done - with that, you can then specify which Sharepoint sites you want to - make searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Sharepoint" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/slab/page.tsx b/web/src/app/admin/connectors/slab/page.tsx deleted file mode 100644 index 11dcd799e46..00000000000 --- a/web/src/app/admin/connectors/slab/page.tsx +++ /dev/null @@ -1,282 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { SlabIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - ConnectorIndexingStatus, - SlabCredentialJson, - SlabConfig, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - isValidating: isCredentialsValidating, - refreshCredentials, - } = usePublicCredentials(); - - if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const slabConnectorIndexingStatuses: ConnectorIndexingStatus< - SlabConfig, - SlabCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "slab" - ); - const slabCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.slab_bot_token - ); - - return ( - <> - {popup} - - Step 1: Provide your Credentials - - - {slabCredential ? ( - <> -
- Existing Slab Bot Token: - - {slabCredential.credential_json?.slab_bot_token} - - -
- - ) : ( - <> - - To use the Slab connector, first follow the guide{" "} - - here - {" "} - to generate a Slab Bot Token. - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - slab_bot_token: Yup.string().required( - "Please enter your Slab Bot Token" - ), - })} - initialValues={{ - slab_bot_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: What's the base URL for your Slab team? - - {slabCredential ? ( - <> - {slabConnectorIndexingStatuses.length > 0 ? ( - <> - - We are pulling the latest documents from{" "} - - { - slabConnectorIndexingStatuses[0].connector - .connector_specific_config.base_url - } - {" "} - every 10 minutes. - - - connectorIndexingStatuses={slabConnectorIndexingStatuses} - liveCredential={slabCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.slab_bot_token}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (slabCredential) { - await linkCredential(connectorId, slabCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Url", - key: "url", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return ( - - {connectorConfig.base_url} - - ); - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> - - ) : ( - <> - - Specify the base URL for your Slab team below. This will look - something like:{" "} - - https://danswer.slab.com/ - - - -

Add a New Space

- - nameBuilder={(values) => `SlabConnector-${values.base_url}`} - ccPairNameBuilder={(values) => values.base_url} - source="slab" - inputType="poll" - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - base_url: Yup.string().required( - "Please enter the base URL for your team e.g. https://danswer.slab.com/" - ), - })} - initialValues={{ - base_url: "", - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={slabCredential.id} - /> -
- - )} - - ) : ( - <> - - Please provide your access token in Step 1 first! Once done with - that, you can then specify the URL for your Slab team and get - started with indexing. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Slab" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/slack/page.tsx b/web/src/app/admin/connectors/slack/page.tsx deleted file mode 100644 index 9352b0d776d..00000000000 --- a/web/src/app/admin/connectors/slack/page.tsx +++ /dev/null @@ -1,296 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { SlackIcon, TrashIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - SlackConfig, - SlackCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, - BooleanFormField, - TextArrayField, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Button, Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const slackConnectorIndexingStatuses: ConnectorIndexingStatus< - SlackConfig, - SlackCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "slack" - ); - const slackCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.slack_bot_token - ); - - return ( - <> - - Step 1: Provide Credentials - - {slackCredential ? ( - <> -
- Existing Slack Bot Token: - - {slackCredential.credential_json.slack_bot_token} - - -
- - ) : ( - <> -

- To use the Slack connector, you must first provide a Slack bot token - corresponding to the Slack App set up in your workspace. For more - details on setting up the Danswer Slack App, see the{" "} - - docs - - . -

- - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - slack_bot_token: Yup.string().required( - "Please enter your Slack bot token" - ), - })} - initialValues={{ - slack_bot_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which channels do you want to make searchable? - - - {slackConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest messages from each workspace listed below every{" "} - 10 minutes. - -
- - connectorIndexingStatuses={slackConnectorIndexingStatuses} - liveCredential={slackCredential} - getCredential={(credential) => - credential.credential_json.slack_bot_token - } - specialColumns={[ - { - header: "Workspace", - key: "workspace", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config.workspace, - }, - { - header: "Channels", - key: "channels", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return connectorConfig.channels && - connectorConfig.channels.length > 0 - ? connectorConfig.channels.join(", ") - : ""; - }, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (slackCredential) { - await linkCredential(connectorId, slackCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - {slackCredential ? ( - -

Connect to a New Workspace

- - nameBuilder={(values) => - values.channels - ? `SlackConnector-${values.workspace}-${values.channels.join( - "_" - )}` - : `SlackConnector-${values.workspace}` - } - source="slack" - inputType="poll" - formBody={ - <> - - - } - formBodyBuilder={(values) => { - return ( - <> - - {TextArrayFieldBuilder({ - name: "channels", - label: "Channels:", - subtext: ` - Specify 0 or more channels to index. For example, specifying the channel - "support" will cause us to only index all content within the "#support" channel. - If no channels are specified, all channels in your workspace will be indexed.`, - })(values)} - - If enabled, we will treat the "channels" - specified above as regular expressions. A channel's - messages will be pulled in by the connector if the name - of the channel fully matches any of the specified - regular expressions. -
- For example, specifying .*-support.* as a - "channel" will cause the connector to include - any channels with "-support" in the name. -
- } - /> - - ); - }} - validationSchema={Yup.object().shape({ - workspace: Yup.string().required( - "Please enter the workspace to index" - ), - channels: Yup.array() - .of(Yup.string().required("Channel names must be strings")) - .required(), - channel_regex_enabled: Yup.boolean().required(), - })} - initialValues={{ - workspace: "", - channels: [], - channel_regex_enabled: false, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={slackCredential.id} - /> - - ) : ( - - Please provide your slack bot token in Step 1 first! Once done with - that, you can then specify which Slack channels you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Slack" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/teams/page.tsx b/web/src/app/admin/connectors/teams/page.tsx deleted file mode 100644 index 530d430abb6..00000000000 --- a/web/src/app/admin/connectors/teams/page.tsx +++ /dev/null @@ -1,275 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, TeamsIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon -import { errorHandlingFetcher } from "@/lib/fetcher"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - TeamsConfig, - TeamsCredentialJson, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; // Modify or create these types as required -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - TextFormField, - TextArrayFieldBuilder, -} from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; -import { ErrorCallout } from "@/components/ErrorCallout"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const teamsConnectorIndexingStatuses: ConnectorIndexingStatus< - TeamsConfig, - TeamsCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "teams" - ); - - const teamsCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.teams_client_id - ); - - return ( - <> - - The Teams connector allows you to index and search through your Teams - channels. Once setup, all messages from the channels contained in the - specified teams will be queryable within Danswer. - - - - Step 1: Provide Teams credentials - - {teamsCredential ? ( - <> -
- Existing Azure AD Client ID: - - {teamsCredential.credential_json.teams_client_id} - - -
- - ) : ( - <> - - As a first step, please provide Application (client) ID, Directory - (tenant) ID, and Client Secret. You can follow the guide{" "} - - here - {" "} - to create an Azure AD application and obtain these values. - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - teams_client_id: Yup.string().required( - "Please enter your Application (client) ID" - ), - teams_directory_id: Yup.string().required( - "Please enter your Directory (tenant) ID" - ), - teams_client_secret: Yup.string().required( - "Please enter your Client Secret" - ), - })} - initialValues={{ - teams_client_id: "", - teams_directory_id: "", - teams_client_secret: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Manage Teams Connector - - - {teamsConnectorIndexingStatuses.length > 0 && ( - <> - - The latest messages from the specified teams are fetched every 10 - minutes. - -
- - connectorIndexingStatuses={teamsConnectorIndexingStatuses} - liveCredential={teamsCredential} - getCredential={(credential) => - credential.credential_json.teams_directory_id - } - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (teamsCredential) { - await linkCredential(connectorId, teamsCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - specialColumns={[ - { - header: "Connectors", - key: "connectors", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.teams}`; - }, - }, - ]} - includeName - /> -
- - )} - - {teamsCredential ? ( - - - nameBuilder={(values) => - values.teams && values.teams.length > 0 - ? `Teams-${values.teams.join("-")}` - : "Teams" - } - ccPairNameBuilder={(values) => - values.teams && values.teams.length > 0 - ? `Teams-${values.teams.join("-")}` - : "Teams" - } - source="teams" - inputType="poll" - // formBody={<>} - formBodyBuilder={TextArrayFieldBuilder({ - name: "teams", - label: "Teams:", - subtext: - "Specify 0 or more Teams to index. " + - "For example, specifying the Team 'Support' for the 'danswerai' Org will cause " + - "us to only index messages sent in channels belonging to the 'Support' Team. " + - "If no Teams are specified, all Teams in your organization will be indexed.", - })} - validationSchema={Yup.object().shape({ - teams: Yup.array() - .of(Yup.string().required("Team names must be strings")) - .required(), - })} - initialValues={{ - teams: [], - }} - credentialId={teamsCredential.id} - refreshFreq={10 * 60} // 10 minutes - /> - - ) : ( - - Please provide all Azure info in Step 1 first! Once you're done - with that, you can then specify which teams you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Teams" /> - - -
- ); -} diff --git a/web/src/app/admin/connectors/web/page.tsx b/web/src/app/admin/connectors/web/page.tsx deleted file mode 100644 index 410d187920e..00000000000 --- a/web/src/app/admin/connectors/web/page.tsx +++ /dev/null @@ -1,184 +0,0 @@ -"use client"; - -import useSWR, { useSWRConfig } from "swr"; -import * as Yup from "yup"; - -import { LoadingAnimation } from "@/components/Loading"; -import { - GlobeIcon, - GearIcon, - ArrowSquareOutIcon, -} from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { - SelectorFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { ConnectorIndexingStatus, WebConfig } from "@/lib/types"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Title } from "@tremor/react"; - -const SCRAPE_TYPE_TO_PRETTY_NAME = { - recursive: "Recursive", - single: "Single Page", - sitemap: "Sitemap", -}; - -export default function Web() { - const { mutate } = useSWRConfig(); - - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const webIndexingStatuses: ConnectorIndexingStatus[] = - connectorIndexingStatuses?.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "web" - ) ?? []; - - return ( -
-
- -
- - } title="Web" /> - - - Step 1: Specify which websites to index - -

- We re-fetch the latest state of the website once a day. -

- - - nameBuilder={(values) => `WebConnector-${values.base_url}`} - ccPairNameBuilder={(values) => values.base_url} - // Since there is no "real" credential associated with a web connector - // we create a dummy one here so that we can associate the CC Pair with a - // user. This is needed since the user for a CC Pair is found via the credential - // associated with it. - shouldCreateEmptyCredentialForConnector={true} - source="web" - inputType="load_state" - formBody={ - <> - -
- -
- - } - validationSchema={Yup.object().shape({ - base_url: Yup.string().required( - "Please enter the website URL to scrape e.g. https://docs.danswer.dev/" - ), - web_connector_type: Yup.string() - .oneOf(["recursive", "single", "sitemap"]) - .optional(), - })} - initialValues={{ - base_url: "", - web_connector_type: undefined, - }} - refreshFreq={60 * 60 * 24} // 1 day - pruneFreq={0} // Don't prune - /> -
- - - Already Indexed Websites - - {isConnectorIndexingStatusesLoading ? ( - - ) : connectorIndexingStatusesError || !connectorIndexingStatuses ? ( -
Error loading indexing history
- ) : webIndexingStatuses.length > 0 ? ( - - connectorIndexingStatuses={webIndexingStatuses} - specialColumns={[ - { - header: "Base URL", - key: "base_url", - getValue: ( - ccPairStatus: ConnectorIndexingStatus - ) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return ( - - ); - }, - }, - { - header: "Scrape Method", - key: "web_connector_type", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return connectorConfig.web_connector_type - ? SCRAPE_TYPE_TO_PRETTY_NAME[ - connectorConfig.web_connector_type - ] - : "Recursive"; - }, - }, - ]} - onUpdate={() => mutate("/api/manage/admin/connector/indexing-status")} - /> - ) : ( -

No indexed websites found

- )} -
- ); -} diff --git a/web/src/app/admin/connectors/wikipedia/page.tsx b/web/src/app/admin/connectors/wikipedia/page.tsx deleted file mode 100644 index f410b209a21..00000000000 --- a/web/src/app/admin/connectors/wikipedia/page.tsx +++ /dev/null @@ -1,214 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { WikipediaIcon, TrashIcon } from "@/components/icons/icons"; -import { - TextArrayField, - TextArrayFieldBuilder, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - WikipediaCredentialJson, - WikipediaConfig, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const wikipediaConnectorIndexingStatuses: ConnectorIndexingStatus< - WikipediaConfig, - WikipediaCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "wikipedia" - ); - const wikipediaCredential: Credential | undefined = - credentialsData.find((credential) => true); - - return ( - <> - {popup} - {wikipediaConnectorIndexingStatuses.length > 0 && ( - <> - - Wikipedia indexing status - - - The latest page, chapter, book and shelf changes are fetched every - 10 minutes. - -
- - connectorIndexingStatuses={wikipediaConnectorIndexingStatuses} - liveCredential={wikipediaCredential} - getCredential={(credential) => { - return
; - }} - onCredentialLink={async (connectorId) => { - if (wikipediaCredential) { - await linkCredential(connectorId, wikipediaCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {wikipediaCredential && ( - <> - -

Create Connection

- - Press connect below to start the connection to your Wikipedia - instance. - - - nameBuilder={(values) => - `WikipediaConnector-${values.connector_name}` - } - ccPairNameBuilder={(values) => - `WikipediaConnector-${values.connector_name}` - } - source="wikipedia" - inputType="poll" - formBodyBuilder={(values) => ( -
- - - {TextArrayFieldBuilder({ - name: "pages", - label: "Pages to index:", - subtext: - "Specify 0 or more names of pages to index. Only specify the name of the page, not its url.", - })(values)} - {TextArrayFieldBuilder({ - name: "categories", - label: "Categories to index:", - subtext: - "Specify 0 or more names of categories to index. These are pages" + - " with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only" + - " specify the name of the category, not its url.", - })(values)} - -
- )} - validationSchema={Yup.object().shape({ - connector_name: Yup.string().required( - "Please enter a name for your Wikipedia connector." - ), - language_code: Yup.string().default("en"), - categories: Yup.array().of( - Yup.string().required( - "Please enter categories to index from your Wikipedia site" - ) - ), - pages: Yup.array().of( - Yup.string().required( - "Please enter pages to index from your Wikipedia site" - ) - ), - recurse_depth: Yup.number().required( - "Please enter the recursion depth for your Wikipedia site." - ), - })} - initialValues={{ - connector_name: "", - language_code: "en", - categories: [], - pages: [], - recurse_depth: 0, - }} - refreshFreq={10 * 60} // 10 minutes - credentialId={wikipediaCredential.id} - /> -
- - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Wikipedia" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/zendesk/page.tsx b/web/src/app/admin/connectors/zendesk/page.tsx deleted file mode 100644 index dac1fe76e49..00000000000 --- a/web/src/app/admin/connectors/zendesk/page.tsx +++ /dev/null @@ -1,254 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { TrashIcon, ZendeskIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { - ZendeskCredentialJson, - ZendeskConfig, - ConnectorIndexingStatus, - Credential, -} from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { LoadingAnimation } from "@/components/Loading"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { usePublicCredentials } from "@/lib/hooks"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { Card, Divider, Text, Title } from "@tremor/react"; - -const Main = () => { - const { popup, setPopup } = usePopup(); - - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const zendeskConnectorIndexingStatuses: ConnectorIndexingStatus< - ZendeskConfig, - ZendeskCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "zendesk" - ); - const zendeskCredential: Credential | undefined = - credentialsData.find( - (credential) => credential.credential_json?.zendesk_email - ); - - return ( - <> - {popup} - - Provide your API details - - - {zendeskCredential ? ( - <> -
-

Existing API Token:

-

- {zendeskCredential.credential_json?.zendesk_token} -

- -
- - ) : ( - <> - - To get started you'll need API token details for your Zendesk - instance. You can generate this by access the Admin Center of your - instance (e.g. https://<subdomain>.zendesk.com/admin/). - Proceed to the "Apps and Integrations" section and - "Zendesk API" page. Add a new API token and provide it - with a name. You will also need to provide the e-mail address of a - user that the system will impersonate. This is of little consequence - as we are only performing read actions. - - - - formBody={ - <> - - - - - } - validationSchema={Yup.object().shape({ - zendesk_subdomain: Yup.string().required( - "Please enter the subdomain for your Zendesk instance" - ), - zendesk_email: Yup.string().required( - "Please enter your user email to user with the token" - ), - zendesk_token: Yup.string().required( - "Please enter your Zendesk API token" - ), - })} - initialValues={{ - zendesk_subdomain: "", - zendesk_email: "", - zendesk_token: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> - - - )} - - {zendeskConnectorIndexingStatuses.length > 0 && ( - <> - - Zendesk indexing status - - - The latest article changes are fetched every 10 minutes. - -
- - connectorIndexingStatuses={zendeskConnectorIndexingStatuses} - liveCredential={zendeskCredential} - getCredential={(credential) => { - return ( -
-

{credential.credential_json.zendesk_token}

-
- ); - }} - onCredentialLink={async (connectorId) => { - if (zendeskCredential) { - await linkCredential(connectorId, zendeskCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - /> -
- - )} - - {zendeskCredential && zendeskConnectorIndexingStatuses.length === 0 && ( - <> - -

Create Connection

-

- Press connect below to start the connection to your Zendesk - instance. -

- - nameBuilder={(values) => `ZendeskConnector`} - ccPairNameBuilder={(values) => `ZendeskConnector`} - source="zendesk" - inputType="poll" - formBody={<>} - validationSchema={Yup.object().shape({})} - initialValues={{}} - refreshFreq={10 * 60} // 10 minutes - credentialId={zendeskCredential.id} - /> -
- - )} - - {!zendeskCredential && ( - <> - - Please provide your API details in Step 1 first! Once done with - that, you'll be able to start the connection then see indexing - status. - - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Zendesk" /> - -
-
- ); -} diff --git a/web/src/app/admin/connectors/zulip/page.tsx b/web/src/app/admin/connectors/zulip/page.tsx deleted file mode 100644 index 66a35df30a5..00000000000 --- a/web/src/app/admin/connectors/zulip/page.tsx +++ /dev/null @@ -1,248 +0,0 @@ -"use client"; - -import * as Yup from "yup"; -import { ZulipIcon, TrashIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { useSWRConfig } from "swr"; -import { LoadingAnimation } from "@/components/Loading"; -import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { - ZulipConfig, - Credential, - ZulipCredentialJson, - ConnectorIndexingStatus, -} from "@/lib/types"; -import { adminDeleteCredential, linkCredential } from "@/lib/credential"; -import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; -import { TextFormField } from "@/components/admin/connectors/Field"; -import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; -import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; -import { usePublicCredentials } from "@/lib/hooks"; -import { Card, Divider, Text, Title } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -const MainSection = () => { - const { mutate } = useSWRConfig(); - const { - data: connectorIndexingStatuses, - isLoading: isConnectorIndexingStatusesLoading, - error: connectorIndexingStatusesError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher - ); - - const { - data: credentialsData, - isLoading: isCredentialsLoading, - error: credentialsError, - refreshCredentials, - } = usePublicCredentials(); - - if ( - (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || - (!credentialsData && isCredentialsLoading) - ) { - return ; - } - - if (connectorIndexingStatusesError || !connectorIndexingStatuses) { - return ( - - ); - } - - if (credentialsError || !credentialsData) { - return ( - - ); - } - - const zulipConnectorIndexingStatuses: ConnectorIndexingStatus< - ZulipConfig, - ZulipCredentialJson - >[] = connectorIndexingStatuses.filter( - (connectorIndexingStatus) => - connectorIndexingStatus.connector.source === "zulip" - ); - const zulipCredential: Credential | undefined = - credentialsData.filter( - (credential) => credential.credential_json?.zuliprc_content - )[0]; - - return ( - <> - - Step 1: Provide Credentials - - {zulipCredential ? ( - <> -
- Existing zuliprc file content: - - {zulipCredential.credential_json.zuliprc_content} - {" "} - -
- - ) : ( - <> - - To use the Zulip connector, you must first provide content of the - zuliprc config file. For more details on setting up the Danswer - Zulip connector, see the{" "} - - docs - - . - - - - formBody={ - <> - - - } - validationSchema={Yup.object().shape({ - zuliprc_content: Yup.string().required( - "Please enter content of the zuliprc file" - ), - })} - initialValues={{ - zuliprc_content: "", - }} - onSubmit={(isSuccess) => { - if (isSuccess) { - refreshCredentials(); - } - }} - /> - - - )} - - - Step 2: Which workspaces do you want to make searchable? - - - {zulipCredential ? ( - <> - {zulipConnectorIndexingStatuses.length > 0 && ( - <> - - We pull the latest messages from each workspace listed below - every 10 minutes. - -
- - credential.credential_json.zuliprc_content - } - specialColumns={[ - { - header: "Realm name", - key: "realm_name", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .realm_name, - }, - { - header: "Realm url", - key: "realm_url", - getValue: (ccPairStatus) => - ccPairStatus.connector.connector_specific_config - .realm_url, - }, - ]} - onUpdate={() => - mutate("/api/manage/admin/connector/indexing-status") - } - onCredentialLink={async (connectorId) => { - if (zulipCredential) { - await linkCredential(connectorId, zulipCredential.id); - mutate("/api/manage/admin/connector/indexing-status"); - } - }} - /> -
- - - )} - - -

Connect to a New Realm

- - nameBuilder={(values) => `ZulipConnector-${values.realm_name}`} - ccPairNameBuilder={(values) => values.realm_name} - source="zulip" - inputType="poll" - credentialId={zulipCredential.id} - formBody={ - <> - - - - } - validationSchema={Yup.object().shape({ - realm_name: Yup.string().required( - "Please enter the realm name" - ), - realm_url: Yup.string().required("Please enter the realm url"), - })} - initialValues={{ - realm_name: "", - realm_url: "", - }} - refreshFreq={10 * 60} // 10 minutes - /> -
- - ) : ( - - Please provide your Zulip credentials in Step 1 first! Once done with - that, you can then specify which Zulip realms you want to make - searchable. - - )} - - ); -}; - -export default function Page() { - return ( -
-
- -
- - } title="Zulip" /> - - -
- ); -} diff --git a/web/src/app/admin/documents/explorer/Explorer.tsx b/web/src/app/admin/documents/explorer/Explorer.tsx index 315df323e8b..a773c222484 100644 --- a/web/src/app/admin/documents/explorer/Explorer.tsx +++ b/web/src/app/admin/documents/explorer/Explorer.tsx @@ -15,8 +15,9 @@ import { HorizontalFilters } from "@/components/search/filtering/Filters"; import { useFilters } from "@/lib/hooks"; import { buildFilters } from "@/lib/search/utils"; import { DocumentUpdatedAtBadge } from "@/components/search/DocumentUpdatedAtBadge"; -import { Connector, DocumentSet } from "@/lib/types"; +import { DocumentSet } from "@/lib/types"; import { SourceIcon } from "@/components/SourceIcon"; +import { Connector } from "@/lib/connectors/connectors"; const DocumentDisplay = ({ document, @@ -173,7 +174,11 @@ export function Explorer({ setQuery(event.target.value); }} onKeyDown={(event) => { - if (event.key === "Enter" && !event.shiftKey) { + if ( + event.key === "Enter" && + !event.shiftKey && + !(event.nativeEvent as any).isComposing + ) { onSearch(query); event.preventDefault(); } diff --git a/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx b/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx index 89a73bf3c5d..814af4e2863 100644 --- a/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx +++ b/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx @@ -3,16 +3,24 @@ import { ArrayHelpers, FieldArray, Form, Formik } from "formik"; import * as Yup from "yup"; import { PopupSpec } from "@/components/admin/connectors/Popup"; -import { createDocumentSet, updateDocumentSet } from "./lib"; -import { ConnectorIndexingStatus, DocumentSet, UserGroup } from "@/lib/types"; import { - BooleanFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; + createDocumentSet, + updateDocumentSet, + DocumentSetCreationRequest, +} from "./lib"; +import { + ConnectorIndexingStatus, + DocumentSet, + UserGroup, + UserRole, +} from "@/lib/types"; +import { TextFormField } from "@/components/admin/connectors/Field"; import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; -import { Button, Divider, Text } from "@tremor/react"; -import { FiUsers } from "react-icons/fi"; +import { Button, Divider } from "@tremor/react"; import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector"; +import React, { useEffect, useState } from "react"; +import { useUser } from "@/components/user/UserProvider"; interface SetCreationPopupProps { ccPairs: ConnectorIndexingStatus[]; @@ -30,27 +38,29 @@ export const DocumentSetCreationForm = ({ existingDocumentSet, }: SetCreationPopupProps) => { const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); - const isUpdate = existingDocumentSet !== undefined; + const [localCcPairs, setLocalCcPairs] = useState(ccPairs); + const { user } = useUser(); + + useEffect(() => { + if (existingDocumentSet?.is_public) { + return; + } + }, [existingDocumentSet?.is_public]); return (
- initialValues={{ - name: existingDocumentSet ? existingDocumentSet.name : "", - description: existingDocumentSet - ? existingDocumentSet.description - : "", - cc_pair_ids: existingDocumentSet - ? existingDocumentSet.cc_pair_descriptors.map( - (ccPairDescriptor) => { - return ccPairDescriptor.id; - } - ) - : ([] as number[]), - is_public: existingDocumentSet ? existingDocumentSet.is_public : true, - users: existingDocumentSet ? existingDocumentSet.users : [], - groups: existingDocumentSet ? existingDocumentSet.groups : [], + name: existingDocumentSet?.name ?? "", + description: existingDocumentSet?.description ?? "", + cc_pair_ids: + existingDocumentSet?.cc_pair_descriptors.map( + (ccPairDescriptor) => ccPairDescriptor.id + ) ?? [], + is_public: existingDocumentSet?.is_public ?? true, + users: existingDocumentSet?.users ?? [], + groups: existingDocumentSet?.groups ?? [], }} validationSchema={Yup.object().shape({ name: Yup.string().required("Please enter a name for the set"), @@ -74,6 +84,7 @@ export const DocumentSetCreationForm = ({ response = await updateDocumentSet({ id: existingDocumentSet.id, ...processedValues, + users: processedValues.users, }); } else { response = await createDocumentSet(processedValues); @@ -98,129 +109,90 @@ export const DocumentSetCreationForm = ({ } }} > - {({ isSubmitting, values }) => ( -
- - + {(props) => { + return ( + + + + {isPaidEnterpriseFeaturesEnabled && + userGroups && + userGroups.length > 0 && ( + + )} - + -

- Pick your connectors: -

-

- All documents indexed by the selected connectors will be a part of - this document set. -

- ( -
- {ccPairs.map((ccPair) => { - const ind = values.cc_pair_ids.indexOf(ccPair.cc_pair_id); - let isSelected = ind !== -1; - return ( -
{ - if (isSelected) { - arrayHelpers.remove(ind); - } else { - arrayHelpers.push(ccPair.cc_pair_id); - } - }} - > -
- -
-
- ); - })} -
- )} - /> + {user?.role === UserRole.CURATOR ? ( + <> +
+

+ These are the connectors available to{" "} + {userGroups && userGroups.length > 1 + ? "the selected group" + : "the group you curate"} + : +

- {isPaidEnterpriseFeaturesEnabled && - userGroups && - userGroups.length > 0 && ( -
- +

+ All documents indexed by these selected connectors will be + a part of this document set. +

+ { + // Filter visible cc pairs + const visibleCcPairs = localCcPairs.filter( + (ccPair) => + ccPair.public_doc || + (ccPair.groups.length > 0 && + props.values.groups.every((group) => + ccPair.groups.includes(group) + )) + ); - - If the document set is public, then it will be visible - to all users. If it is not public, then only - users in the specified groups will be able to see it. - - } - /> + // Deselect filtered out cc pairs + const visibleCcPairIds = visibleCcPairs.map( + (ccPair) => ccPair.cc_pair_id + ); + props.values.cc_pair_ids = + props.values.cc_pair_ids.filter((id) => + visibleCcPairIds.includes(id) + ); - -

- Groups with Access -

- {!values.is_public ? ( - <> - - If any groups are specified, then this Document Set will - only be visible to the specified groups. If no groups - are specified, then the Document Set will be visible to - all users. - - ( -
- {userGroups.map((userGroup) => { - const ind = values.groups.indexOf(userGroup.id); + return ( +
+ {visibleCcPairs.map((ccPair) => { + const ind = props.values.cc_pair_ids.indexOf( + ccPair.cc_pair_id + ); let isSelected = ind !== -1; return (
-
- {" "} - {userGroup.name} +
+
); })}
- )} - /> - - ) : ( - - This Document Set is public, so this does not apply. If - you want to control which user groups see this Document - Set, mark it as non-public! - - )} + ); + }} + /> +
+ +
+ { + // Filter non-visible cc pairs + const nonVisibleCcPairs = localCcPairs.filter( + (ccPair) => + !ccPair.public_doc && + (ccPair.groups.length === 0 || + !props.values.groups.every((group) => + ccPair.groups.includes(group) + )) + ); + + return nonVisibleCcPairs.length > 0 ? ( + <> + +

+ These connectors are not available to the{" "} + {userGroups && userGroups.length > 1 + ? `group${props.values.groups.length > 1 ? "s" : ""} you have selected` + : "group you curate"} + : +

+

+ Only connectors that are directly assigned to the + group you are trying to add the document set to + will be available. +

+
+ {nonVisibleCcPairs.map((ccPair) => ( +
+
+ +
+
+ ))} +
+ + ) : null; + }} + /> +
+ + ) : ( +
+

+ Pick your connectors: +

+

+ All documents indexed by the selected connectors will be a + part of this document set. +

+ ( +
+ {ccPairs.map((ccPair) => { + const ind = props.values.cc_pair_ids.indexOf( + ccPair.cc_pair_id + ); + let isSelected = ind !== -1; + return ( +
{ + if (isSelected) { + arrayHelpers.remove(ind); + } else { + arrayHelpers.push(ccPair.cc_pair_id); + } + }} + > +
+ +
+
+ ); + })} +
+ )} + />
)} -
- -
- - )} + +
+ +
+ + ); + }}
); diff --git a/web/src/app/admin/documents/sets/hooks.tsx b/web/src/app/admin/documents/sets/hooks.tsx index fe7969acec9..1362cfda23e 100644 --- a/web/src/app/admin/documents/sets/hooks.tsx +++ b/web/src/app/admin/documents/sets/hooks.tsx @@ -2,20 +2,20 @@ import { errorHandlingFetcher } from "@/lib/fetcher"; import { DocumentSet } from "@/lib/types"; import useSWR, { mutate } from "swr"; -const DOCUMENT_SETS_URL = "/api/manage/admin/document-set"; +const DOCUMENT_SETS_URL = "/api/manage/document-set"; +const GET_EDITABLE_DOCUMENT_SETS_URL = + "/api/manage/document-set?get_editable=true"; export function refreshDocumentSets() { mutate(DOCUMENT_SETS_URL); } -export function useDocumentSets() { - const swrResponse = useSWR( - DOCUMENT_SETS_URL, - errorHandlingFetcher, - { - refreshInterval: 5000, // 5 seconds - } - ); +export function useDocumentSets(getEditable: boolean = false) { + const url = getEditable ? GET_EDITABLE_DOCUMENT_SETS_URL : DOCUMENT_SETS_URL; + + const swrResponse = useSWR(url, errorHandlingFetcher, { + refreshInterval: 5000, // 5 seconds + }); return { ...swrResponse, diff --git a/web/src/app/admin/documents/sets/lib.ts b/web/src/app/admin/documents/sets/lib.ts index 2184504cc31..807016ee001 100644 --- a/web/src/app/admin/documents/sets/lib.ts +++ b/web/src/app/admin/documents/sets/lib.ts @@ -1,4 +1,4 @@ -interface DocumentSetCreationRequest { +export interface DocumentSetCreationRequest { name: string; description: string; cc_pair_ids: number[]; diff --git a/web/src/app/admin/documents/sets/page.tsx b/web/src/app/admin/documents/sets/page.tsx index d5098322ac5..718b81ab0b6 100644 --- a/web/src/app/admin/documents/sets/page.tsx +++ b/web/src/app/admin/documents/sets/page.tsx @@ -16,7 +16,9 @@ import { } from "@tremor/react"; import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types"; -import { useState } from "react"; +import { useState, useEffect } from "react"; +import { getCurrentUser } from "@/lib/user"; +import { User, UserRole } from "@/lib/types"; import { useDocumentSets } from "./hooks"; import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; import { deleteDocumentSet } from "./lib"; @@ -28,6 +30,8 @@ import { FiCheckCircle, FiClock, FiEdit2, + FiLock, + FiUnlock, } from "react-icons/fi"; import { DeleteButton } from "@/components/DeleteButton"; import Link from "next/link"; @@ -35,10 +39,25 @@ import { useRouter } from "next/navigation"; const numToDisplay = 50; -const EditRow = ({ documentSet }: { documentSet: DocumentSet }) => { +const EditRow = ({ + documentSet, + isEditable, +}: { + documentSet: DocumentSet; + isEditable: boolean; +}) => { const router = useRouter(); const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false); + + if (!isEditable) { + return ( +
+ {documentSet.name} +
+ ); + } + return (
{isSyncingTooltipOpen && ( @@ -79,12 +98,16 @@ interface DocumentFeedbackTableProps { documentSets: DocumentSet[]; ccPairs: ConnectorIndexingStatus[]; refresh: () => void; + refreshEditable: () => void; setPopup: (popupSpec: PopupSpec | null) => void; + editableDocumentSets: DocumentSet[]; } const DocumentSetTable = ({ documentSets, + editableDocumentSets, refresh, + refreshEditable, setPopup, }: DocumentFeedbackTableProps) => { const [page, setPage] = useState(1); @@ -100,6 +123,13 @@ const DocumentSetTable = ({ } }); + const sortedDocumentSets = [ + ...editableDocumentSets, + ...documentSets.filter( + (ds) => !editableDocumentSets.some((eds) => eds.id === ds.id) + ), + ]; + return (
Existing Document Sets @@ -109,18 +139,25 @@ const DocumentSetTable = ({ Name Connectors Status + Public Delete - {documentSets + {sortedDocumentSets .slice((page - 1) * numToDisplay, page * numToDisplay) .map((documentSet) => { + const isEditable = editableDocumentSets.some( + (eds) => eds.id === documentSet.id + ); return (
- +
@@ -165,26 +202,50 @@ const DocumentSetTable = ({ )} - { - const response = await deleteDocumentSet( - documentSet.id - ); - if (response.ok) { - setPopup({ - message: `Document set "${documentSet.name}" scheduled for deletion`, - type: "success", - }); - } else { - const errorMsg = (await response.json()).detail; - setPopup({ - message: `Failed to schedule document set for deletion - ${errorMsg}`, - type: "error", - }); - } - refresh(); - }} - /> + {documentSet.is_public ? ( + + Public + + ) : ( + + Private + + )} + + + {isEditable ? ( + { + const response = await deleteDocumentSet( + documentSet.id + ); + if (response.ok) { + setPopup({ + message: `Document set "${documentSet.name}" scheduled for deletion`, + type: "success", + }); + } else { + const errorMsg = (await response.json()).detail; + setPopup({ + message: `Failed to schedule document set for deletion - ${errorMsg}`, + type: "error", + }); + } + refresh(); + refreshEditable(); + }} + /> + ) : ( + "-" + )}
); @@ -195,7 +256,7 @@ const DocumentSetTable = ({
setPage(newPage)} /> @@ -213,6 +274,12 @@ const Main = () => { error: documentSetsError, refreshDocumentSets, } = useDocumentSets(); + const { + data: editableDocumentSets, + isLoading: isEditableDocumentSetsLoading, + error: editableDocumentSetsError, + refreshDocumentSets: refreshEditableDocumentSets, + } = useDocumentSets(true); const { data: ccPairs, @@ -220,7 +287,11 @@ const Main = () => { error: ccPairsError, } = useConnectorCredentialIndexingStatus(); - if (isDocumentSetsLoading || isCCPairsLoading) { + if ( + isDocumentSetsLoading || + isCCPairsLoading || + isEditableDocumentSetsLoading + ) { return ; } @@ -228,6 +299,10 @@ const Main = () => { return
Error: {documentSetsError}
; } + if (editableDocumentSetsError || !editableDocumentSets) { + return
Error: {editableDocumentSetsError}
; + } + if (ccPairsError || !ccPairs) { return
Error: {ccPairsError}
; } @@ -258,8 +333,10 @@ const Main = () => { diff --git a/web/src/app/admin/embeddings/EmbeddingModelSelectionForm.tsx b/web/src/app/admin/embeddings/EmbeddingModelSelectionForm.tsx new file mode 100644 index 00000000000..1b9fffda428 --- /dev/null +++ b/web/src/app/admin/embeddings/EmbeddingModelSelectionForm.tsx @@ -0,0 +1,306 @@ +"use client"; + +import { errorHandlingFetcher } from "@/lib/fetcher"; +import useSWR, { mutate } from "swr"; +import { Dispatch, SetStateAction, useState } from "react"; +import { + CloudEmbeddingProvider, + CloudEmbeddingModel, + AVAILABLE_CLOUD_PROVIDERS, + AVAILABLE_MODELS, + INVALID_OLD_MODEL, + HostedEmbeddingModel, + EmbeddingModelDescriptor, + EmbeddingProvider, +} from "../../../components/embedding/interfaces"; +import { Connector } from "@/lib/connectors/connectors"; +import OpenEmbeddingPage from "./pages/OpenEmbeddingPage"; +import CloudEmbeddingPage from "./pages/CloudEmbeddingPage"; +import { ProviderCreationModal } from "./modals/ProviderCreationModal"; + +import { DeleteCredentialsModal } from "./modals/DeleteCredentialsModal"; +import { SelectModelModal } from "./modals/SelectModelModal"; +import { ChangeCredentialsModal } from "./modals/ChangeCredentialsModal"; +import { ModelSelectionConfirmationModal } from "./modals/ModelSelectionModal"; +import { AlreadyPickedModal } from "./modals/AlreadyPickedModal"; +import { ModelOption } from "../../../components/embedding/ModelSelector"; +import { EMBEDDING_PROVIDERS_ADMIN_URL } from "../configuration/llm/constants"; + +export interface EmbeddingDetails { + api_key: string; + custom_config: any; + provider_type: EmbeddingProvider; +} + +export function EmbeddingModelSelection({ + selectedProvider, + currentEmbeddingModel, + updateSelectedProvider, + modelTab, + setModelTab, +}: { + modelTab: "open" | "cloud" | null; + setModelTab: Dispatch>; + currentEmbeddingModel: CloudEmbeddingModel | HostedEmbeddingModel; + selectedProvider: CloudEmbeddingModel | HostedEmbeddingModel; + updateSelectedProvider: ( + model: CloudEmbeddingModel | HostedEmbeddingModel + ) => void; +}) { + // Cloud Provider based modals + const [showTentativeProvider, setShowTentativeProvider] = + useState(null); + + const [showUnconfiguredProvider, setShowUnconfiguredProvider] = + useState(null); + const [changeCredentialsProvider, setChangeCredentialsProvider] = + useState(null); + + // Cloud Model based modals + const [alreadySelectedModel, setAlreadySelectedModel] = + useState(null); + const [showTentativeModel, setShowTentativeModel] = + useState(null); + + const [showModelInQueue, setShowModelInQueue] = + useState(null); + + // Open Model based modals + const [showTentativeOpenProvider, setShowTentativeOpenProvider] = + useState(null); + + // Enabled / unenabled providers + const [newEnabledProviders, setNewEnabledProviders] = useState([]); + const [newUnenabledProviders, setNewUnenabledProviders] = useState( + [] + ); + + const [showDeleteCredentialsModal, setShowDeleteCredentialsModal] = + useState(false); + const [showAddConnectorPopup, setShowAddConnectorPopup] = + useState(false); + + const { data: embeddingProviderDetails } = useSWR( + EMBEDDING_PROVIDERS_ADMIN_URL, + errorHandlingFetcher + ); + + const { data: connectors } = useSWR[]>( + "/api/manage/connector", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const onConfirmSelection = async (model: EmbeddingModelDescriptor) => { + const response = await fetch( + "/api/search-settings/set-new-search-settings", + { + method: "POST", + body: JSON.stringify({ ...model, index_name: null }), + headers: { + "Content-Type": "application/json", + }, + } + ); + if (response.ok) { + setShowTentativeModel(null); + mutate("/api/search-settings/get-secondary-search-settings"); + if (!connectors || !connectors.length) { + setShowAddConnectorPopup(true); + } + } else { + alert(`Failed to update embedding model - ${await response.text()}`); + } + }; + + const onSelectOpenSource = async (model: HostedEmbeddingModel) => { + if (selectedProvider?.model_name === INVALID_OLD_MODEL) { + await onConfirmSelection(model); + } else { + setShowTentativeOpenProvider(model); + } + }; + + const clientsideAddProvider = (provider: CloudEmbeddingProvider) => { + const providerType = provider.provider_type; + setNewEnabledProviders((newEnabledProviders) => [ + ...newEnabledProviders, + providerType, + ]); + setNewUnenabledProviders((newUnenabledProviders) => + newUnenabledProviders.filter( + (givenProviderType) => givenProviderType != providerType + ) + ); + }; + + const clientsideRemoveProvider = (provider: CloudEmbeddingProvider) => { + const providerType = provider.provider_type; + setNewEnabledProviders((newEnabledProviders) => + newEnabledProviders.filter( + (givenProviderType) => givenProviderType != providerType + ) + ); + setNewUnenabledProviders((newUnenabledProviders) => [ + ...newUnenabledProviders, + providerType, + ]); + }; + + return ( +
+ {alreadySelectedModel && ( + setAlreadySelectedModel(null)} + /> + )} + + {showTentativeOpenProvider && ( + + model.model_name === showTentativeOpenProvider.model_name + ) === undefined + } + onConfirm={() => { + updateSelectedProvider(showTentativeOpenProvider); + setShowTentativeOpenProvider(null); + }} + onCancel={() => setShowTentativeOpenProvider(null)} + /> + )} + + {showTentativeProvider && ( + { + setShowTentativeProvider(showUnconfiguredProvider); + clientsideAddProvider(showTentativeProvider); + if (showModelInQueue) { + setShowTentativeModel(showModelInQueue); + } + }} + onCancel={() => { + setShowModelInQueue(null); + setShowTentativeProvider(null); + }} + /> + )} + {changeCredentialsProvider && ( + { + clientsideRemoveProvider(changeCredentialsProvider); + setChangeCredentialsProvider(null); + }} + provider={changeCredentialsProvider} + onConfirm={() => setChangeCredentialsProvider(null)} + onCancel={() => setChangeCredentialsProvider(null)} + /> + )} + + {showTentativeModel && ( + { + setShowModelInQueue(null); + updateSelectedProvider(showTentativeModel); + setShowTentativeModel(null); + }} + onCancel={() => { + setShowModelInQueue(null); + setShowTentativeModel(null); + }} + /> + )} + + {showDeleteCredentialsModal && ( + { + setShowDeleteCredentialsModal(false); + }} + onCancel={() => setShowDeleteCredentialsModal(false)} + /> + )} + +

+ Select from cloud, self-hosted models, or continue with your current + embedding model. +

+
+ +
+ +
+
+ +
+
+ + {modelTab == "open" && ( + + )} + + {modelTab == "cloud" && ( + + )} + + {!modelTab && ( + <> + + + )} +
+ ); +} diff --git a/web/src/app/admin/embeddings/RerankingFormPage.tsx b/web/src/app/admin/embeddings/RerankingFormPage.tsx new file mode 100644 index 00000000000..81f24e7589a --- /dev/null +++ b/web/src/app/admin/embeddings/RerankingFormPage.tsx @@ -0,0 +1,242 @@ +import React, { Dispatch, forwardRef, SetStateAction, useState } from "react"; +import { Formik, Form, FormikProps } from "formik"; +import * as Yup from "yup"; +import { + RerankerProvider, + RerankingDetails, + rerankingModels, +} from "./interfaces"; +import { FiExternalLink } from "react-icons/fi"; +import { CohereIcon, MixedBreadIcon } from "@/components/icons/icons"; +import { Modal } from "@/components/Modal"; +import { Button } from "@tremor/react"; +import { TextFormField } from "@/components/admin/connectors/Field"; + +interface RerankingDetailsFormProps { + setRerankingDetails: Dispatch>; + currentRerankingDetails: RerankingDetails; + originalRerankingDetails: RerankingDetails; + modelTab: "open" | "cloud" | null; + setModelTab: Dispatch>; +} + +const RerankingDetailsForm = forwardRef< + FormikProps, + RerankingDetailsFormProps +>( + ( + { + setRerankingDetails, + originalRerankingDetails, + currentRerankingDetails, + modelTab, + setModelTab, + }, + ref + ) => { + const [isApiKeyModalOpen, setIsApiKeyModalOpen] = useState(false); + + return ( +
+

+ Post-processing +

+
+ {originalRerankingDetails.rerank_model_name && ( + + )} +
+ +
+ +
+ +
+
+ + () + .nullable() + .oneOf(Object.values(RerankerProvider)) + .optional(), + rerank_api_key: Yup.string().nullable(), + num_rerank: Yup.number().min(1, "Must be at least 1"), + })} + onSubmit={async (_, { setSubmitting }) => { + setSubmitting(false); + }} + enableReinitialize={true} + > + {({ values, setFieldValue }) => ( +
+
+ {(modelTab + ? rerankingModels.filter( + (model) => model.cloud == (modelTab == "cloud") + ) + : rerankingModels.filter( + (modelCard) => + modelCard.modelName == + originalRerankingDetails.rerank_model_name + ) + ).map((card) => { + const isSelected = + values.rerank_provider_type === card.rerank_provider_type && + values.rerank_model_name === card.modelName; + return ( +
{ + if (card.rerank_provider_type) { + setIsApiKeyModalOpen(true); + } + setRerankingDetails({ + ...values, + rerank_provider_type: card.rerank_provider_type!, + rerank_model_name: card.modelName, + }); + setFieldValue( + "rerank_provider_type", + card.rerank_provider_type + ); + setFieldValue("rerank_model_name", card.modelName); + }} + > +
+
+ {card.rerank_provider_type === + RerankerProvider.COHERE ? ( + + ) : ( + + )} +

+ {card.displayName} +

+
+ {card.link && ( + e.stopPropagation()} + className="text-blue-500 hover:text-blue-700 transition-colors duration-200" + > + + + )} +
+

+ {card.description} +

+
+ {card.cloud ? "Cloud-based" : "Self-hosted"} +
+
+ ); + })} +
+ + {isApiKeyModalOpen && ( + { + Object.keys(originalRerankingDetails).forEach((key) => { + setFieldValue( + key, + originalRerankingDetails[key as keyof RerankingDetails] + ); + }); + + setIsApiKeyModalOpen(false); + }} + width="w-[800px]" + title="API Key Configuration" + > +
+ ) => { + const value = e.target.value; + setRerankingDetails({ ...values, rerank_api_key: value }); + setFieldValue("rerank_api_key", value); + }} + type="password" + label="Cohere API Key" + name="rerank_api_key" + /> +
+ + +
+
+
+ )} +
+ )} +
+
+ ); + } +); + +RerankingDetailsForm.displayName = "RerankingDetailsForm"; +export default RerankingDetailsForm; diff --git a/web/src/app/admin/embeddings/interfaces.ts b/web/src/app/admin/embeddings/interfaces.ts new file mode 100644 index 00000000000..c3dec13e6cc --- /dev/null +++ b/web/src/app/admin/embeddings/interfaces.ts @@ -0,0 +1,89 @@ +import { EmbeddingProvider } from "@/components/embedding/interfaces"; +import { NonNullChain } from "typescript"; + +export interface RerankingDetails { + rerank_model_name: string | null; + rerank_provider_type: RerankerProvider | null; + rerank_api_key: string | null; + num_rerank: number; +} + +export enum RerankerProvider { + COHERE = "cohere", +} +export interface AdvancedSearchConfiguration { + model_name: string; + model_dim: number; + normalize: boolean; + query_prefix: string; + passage_prefix: string; + index_name: string | null; + multipass_indexing: boolean; + multilingual_expansion: string[]; + disable_rerank_for_streaming: boolean; +} + +export interface SavedSearchSettings extends RerankingDetails { + model_name: string; + model_dim: number; + normalize: boolean; + query_prefix: string; + passage_prefix: string; + index_name: string | null; + multipass_indexing: boolean; + multilingual_expansion: string[]; + disable_rerank_for_streaming: boolean; + provider_type: EmbeddingProvider | null; +} + +export interface RerankingModel { + rerank_provider_type: RerankerProvider | null; + modelName: string; + displayName: string; + description: string; + link: string; + cloud: boolean; +} + +export const rerankingModels: RerankingModel[] = [ + { + rerank_provider_type: null, + cloud: false, + modelName: "mixedbread-ai/mxbai-rerank-xsmall-v1", + displayName: "MixedBread XSmall", + description: "Fastest, smallest model for basic reranking tasks.", + link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1", + }, + { + rerank_provider_type: null, + cloud: false, + modelName: "mixedbread-ai/mxbai-rerank-base-v1", + displayName: "MixedBread Base", + description: "Balanced performance for general reranking needs.", + link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1", + }, + { + rerank_provider_type: null, + cloud: false, + modelName: "mixedbread-ai/mxbai-rerank-large-v1", + displayName: "MixedBread Large", + description: "Most powerful model for complex reranking tasks.", + link: "https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1", + }, + { + cloud: true, + rerank_provider_type: RerankerProvider.COHERE, + modelName: "rerank-english-v3.0", + displayName: "Cohere English", + description: "High-performance English-focused reranking model.", + link: "https://docs.cohere.com/docs/rerank", + }, + { + cloud: true, + rerank_provider_type: RerankerProvider.COHERE, + modelName: "rerank-multilingual-v3.0", + displayName: "Cohere Multilingual", + description: "Powerful multilingual reranking model.", + link: "https://docs.cohere.com/docs/rerank", + }, +]; diff --git a/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx b/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx new file mode 100644 index 00000000000..d6e29424114 --- /dev/null +++ b/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx @@ -0,0 +1,32 @@ +import React from "react"; +import { Modal } from "@/components/Modal"; +import { Button, Text } from "@tremor/react"; + +import { CloudEmbeddingModel } from "../../../../components/embedding/interfaces"; + +export function AlreadyPickedModal({ + model, + onClose, +}: { + model: CloudEmbeddingModel; + onClose: () => void; +}) { + return ( + +
+ + You can select a different one if you want! + +
+ +
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx b/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx new file mode 100644 index 00000000000..c2f3923e5cd --- /dev/null +++ b/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx @@ -0,0 +1,232 @@ +import React, { useRef, useState } from "react"; +import { Modal } from "@/components/Modal"; +import { Button, Text, Callout, Subtitle, Divider } from "@tremor/react"; +import { Label, TextFormField } from "@/components/admin/connectors/Field"; +import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; +import { + EMBEDDING_PROVIDERS_ADMIN_URL, + LLM_PROVIDERS_ADMIN_URL, +} from "../../configuration/llm/constants"; +import { mutate } from "swr"; + +export function ChangeCredentialsModal({ + provider, + onConfirm, + onCancel, + onDeleted, + useFileUpload, +}: { + provider: CloudEmbeddingProvider; + onConfirm: () => void; + onCancel: () => void; + onDeleted: () => void; + useFileUpload: boolean; +}) { + const [apiKey, setApiKey] = useState(""); + const [testError, setTestError] = useState(""); + const [fileName, setFileName] = useState(""); + const fileInputRef = useRef(null); + const [isProcessing, setIsProcessing] = useState(false); + const [deletionError, setDeletionError] = useState(""); + + const clearFileInput = () => { + setFileName(""); + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + }; + + const handleFileUpload = async ( + event: React.ChangeEvent + ) => { + const file = event.target.files?.[0]; + setFileName(""); + + if (file) { + setFileName(file.name); + try { + setDeletionError(""); + const fileContent = await file.text(); + let jsonContent; + try { + jsonContent = JSON.parse(fileContent); + setApiKey(JSON.stringify(jsonContent)); + } catch (parseError) { + throw new Error( + "Failed to parse JSON file. Please ensure it's a valid JSON." + ); + } + } catch (error) { + setTestError( + error instanceof Error + ? error.message + : "An unknown error occurred while processing the file." + ); + setApiKey(""); + clearFileInput(); + } + } + }; + + const handleDelete = async () => { + setDeletionError(""); + setIsProcessing(true); + + try { + const response = await fetch( + `${EMBEDDING_PROVIDERS_ADMIN_URL}/${provider.provider_type}`, + { + method: "DELETE", + } + ); + + if (!response.ok) { + const errorData = await response.json(); + setDeletionError(errorData.detail); + return; + } + + mutate(LLM_PROVIDERS_ADMIN_URL); + onDeleted(); + } catch (error) { + setDeletionError( + error instanceof Error ? error.message : "An unknown error occurred" + ); + } finally { + setIsProcessing(false); + } + }; + + const handleSubmit = async () => { + setTestError(""); + try { + const testResponse = await fetch("/api/admin/embedding/test-embedding", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + provider_type: provider.provider_type.toLowerCase().split(" ")[0], + api_key: apiKey, + }), + }); + + if (!testResponse.ok) { + const errorMsg = (await testResponse.json()).detail; + throw new Error(errorMsg); + } + + const updateResponse = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + provider_type: provider.provider_type.toLowerCase().split(" ")[0], + api_key: apiKey, + is_default_provider: false, + is_configured: true, + }), + }); + + if (!updateResponse.ok) { + const errorData = await updateResponse.json(); + throw new Error( + errorData.detail || "Failed to update provider- check your API key" + ); + } + + onConfirm(); + } catch (error) { + setTestError( + error instanceof Error ? error.message : "An unknown error occurred" + ); + } + }; + + return ( + +
+ + Want to swap out your key? + + + Visit API + + +
+ {useFileUpload ? ( + <> + + + {fileName &&

Uploaded file: {fileName}

} + + ) : ( + <> + setApiKey(e.target.value)} + placeholder="Paste your API key here" + /> + + )} +
+ + {testError && ( + + {testError} + + )} + +
+ +
+ + + + You can also delete your key. + + + This is only possible if you have already switched to a different + embedding type! + + + + {deletionError && ( + + {deletionError} + + )} +
+
+ ); +} diff --git a/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx b/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx new file mode 100644 index 00000000000..2d77d0febd3 --- /dev/null +++ b/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx @@ -0,0 +1,42 @@ +import React from "react"; +import { Modal } from "@/components/Modal"; +import { Button, Text, Callout } from "@tremor/react"; +import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; + +export function DeleteCredentialsModal({ + modelProvider, + onConfirm, + onCancel, +}: { + modelProvider: CloudEmbeddingProvider; + onConfirm: () => void; + onCancel: () => void; +}) { + return ( + +
+ + You're about to delete your {modelProvider.provider_type}{" "} + credentials. Are you sure? + + +
+ + +
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx b/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx new file mode 100644 index 00000000000..3f59de11657 --- /dev/null +++ b/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx @@ -0,0 +1,64 @@ +import { Modal } from "@/components/Modal"; +import { Button, Text, Callout } from "@tremor/react"; +import { + EmbeddingModelDescriptor, + HostedEmbeddingModel, +} from "../../../../components/embedding/interfaces"; + +export function ModelSelectionConfirmationModal({ + selectedModel, + isCustom, + onConfirm, + onCancel, +}: { + selectedModel: HostedEmbeddingModel; + isCustom: boolean; + onConfirm: () => void; + onCancel: () => void; +}) { + return ( + +
+
+ + You have selected: {selectedModel.model_name}. Are you sure + you want to update to this new embedding model? + + + We will re-index all your documents in the background so you will be + able to continue to use Danswer as normal with the old model in the + meantime. Depending on how many documents you have indexed, this may + take a while. + + + NOTE: this re-indexing process will consume more resources + than normal. If you are self-hosting, we recommend that you allocate + at least 16GB of RAM to Danswer during this process. + + + {isCustom && ( + + We've detected that this is a custom-specified embedding + model. Since we have to download the model files before verifying + the configuration's correctness, we won't be able to let + you know if the configuration is valid until after we start + re-indexing your documents. If there is an issue, it will show up + on this page as an indexing error on this page after clicking + Confirm. + + )} + +
+ +
+
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx b/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx new file mode 100644 index 00000000000..4b2ad9c51fc --- /dev/null +++ b/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx @@ -0,0 +1,233 @@ +import React, { useRef, useState } from "react"; +import { Text, Button, Callout } from "@tremor/react"; +import { Formik, Form, Field } from "formik"; +import * as Yup from "yup"; +import { Label, TextFormField } from "@/components/admin/connectors/Field"; +import { LoadingAnimation } from "@/components/Loading"; +import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; +import { EMBEDDING_PROVIDERS_ADMIN_URL } from "../../configuration/llm/constants"; +import { Modal } from "@/components/Modal"; + +export function ProviderCreationModal({ + selectedProvider, + onConfirm, + onCancel, + existingProvider, +}: { + selectedProvider: CloudEmbeddingProvider; + onConfirm: () => void; + onCancel: () => void; + existingProvider?: CloudEmbeddingProvider; +}) { + const useFileUpload = selectedProvider.provider_type == "Google"; + + const [isProcessing, setIsProcessing] = useState(false); + const [errorMsg, setErrorMsg] = useState(""); + const [fileName, setFileName] = useState(""); + + const initialValues = { + provider_type: + existingProvider?.provider_type || selectedProvider.provider_type, + api_key: existingProvider?.api_key || "", + custom_config: existingProvider?.custom_config + ? Object.entries(existingProvider.custom_config) + : [], + model_id: 0, + }; + + const validationSchema = Yup.object({ + provider_type: Yup.string().required("Provider type is required"), + api_key: useFileUpload + ? Yup.string() + : Yup.string().required("API Key is required"), + custom_config: Yup.array().of(Yup.array().of(Yup.string()).length(2)), + }); + + const fileInputRef = useRef(null); + + const handleFileUpload = async ( + event: React.ChangeEvent, + setFieldValue: (field: string, value: any) => void + ) => { + const file = event.target.files?.[0]; + setFileName(""); + if (file) { + setFileName(file.name); + try { + const fileContent = await file.text(); + let jsonContent; + try { + jsonContent = JSON.parse(fileContent); + } catch (parseError) { + throw new Error( + "Failed to parse JSON file. Please ensure it's a valid JSON." + ); + } + setFieldValue("api_key", JSON.stringify(jsonContent)); + } catch (error) { + setFieldValue("api_key", ""); + } + } + }; + + const handleSubmit = async ( + values: any, + { setSubmitting }: { setSubmitting: (isSubmitting: boolean) => void } + ) => { + setIsProcessing(true); + setErrorMsg(""); + try { + const customConfig = Object.fromEntries(values.custom_config); + + const initialResponse = await fetch( + "/api/admin/embedding/test-embedding", + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + provider_type: values.provider_type.toLowerCase().split(" ")[0], + api_key: values.api_key, + }), + } + ); + + if (!initialResponse.ok) { + const errorMsg = (await initialResponse.json()).detail; + setErrorMsg(errorMsg); + setIsProcessing(false); + setSubmitting(false); + return; + } + + const response = await fetch(EMBEDDING_PROVIDERS_ADMIN_URL, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + ...values, + provider_type: values.provider_type.toLowerCase().split(" ")[0], + custom_config: customConfig, + is_default_provider: false, + is_configured: true, + }), + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error( + errorData.detail || "Failed to update provider- check your API key" + ); + } + + onConfirm(); + } catch (error: unknown) { + if (error instanceof Error) { + setErrorMsg(error.message); + } else { + setErrorMsg("An unknown error occurred"); + } + } finally { + setIsProcessing(false); + setSubmitting(false); + } + }; + + return ( + +
+ + {({ + values, + errors, + touched, + isSubmitting, + handleSubmit, + setFieldValue, + }) => ( +
+ + You are setting the credentials for this provider. To access + this information, follow the instructions{" "} + + here + {" "} + and gather your{" "} + + API KEY + + + +
+ {useFileUpload ? ( + <> + + handleFileUpload(e, setFieldValue)} + className="text-lg w-full p-1" + /> + {fileName &&

Uploaded file: {fileName}

} + + ) : ( + + )} + + + Learn more here + +
+ + {errorMsg && ( + + {errorMsg} + + )} + + +
+ )} +
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/modals/SelectModelModal.tsx b/web/src/app/admin/embeddings/modals/SelectModelModal.tsx new file mode 100644 index 00000000000..ed84fdc93cb --- /dev/null +++ b/web/src/app/admin/embeddings/modals/SelectModelModal.tsx @@ -0,0 +1,37 @@ +import React from "react"; +import { Modal } from "@/components/Modal"; +import { Button, Text } from "@tremor/react"; +import { CloudEmbeddingModel } from "../../../../components/embedding/interfaces"; + +export function SelectModelModal({ + model, + onConfirm, + onCancel, +}: { + model: CloudEmbeddingModel; + onConfirm: () => void; + onCancel: () => void; +}) { + return ( + +
+ + You're selecting a new embedding model, {model.model_name}. If + you update to this model, you will need to undergo a complete + re-indexing. +
+ Are you sure? +
+
+ +
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/page.tsx b/web/src/app/admin/embeddings/page.tsx new file mode 100644 index 00000000000..137602fa14d --- /dev/null +++ b/web/src/app/admin/embeddings/page.tsx @@ -0,0 +1,18 @@ +"use client"; + +import { EmbeddingFormProvider } from "@/components/context/EmbeddingContext"; +import EmbeddingSidebar from "../../../components/embedding/EmbeddingSidebar"; +import EmbeddingForm from "./pages/EmbeddingFormPage"; + +export default function EmbeddingWrapper() { + return ( + +
+ +
+ +
+
+
+ ); +} diff --git a/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx b/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx new file mode 100644 index 00000000000..89d885a1368 --- /dev/null +++ b/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx @@ -0,0 +1,172 @@ +import React, { Dispatch, forwardRef, SetStateAction } from "react"; +import { Formik, Form, FormikProps, FieldArray, Field } from "formik"; +import * as Yup from "yup"; +import CredentialSubText from "@/components/credentials/CredentialFields"; +import { TrashIcon } from "@/components/icons/icons"; +import { FaPlus } from "react-icons/fa"; +import { AdvancedSearchConfiguration, RerankingDetails } from "../interfaces"; +import { BooleanFormField } from "@/components/admin/connectors/Field"; +import NumberInput from "../../connectors/[connector]/pages/ConnectorInput/NumberInput"; + +interface AdvancedEmbeddingFormPageProps { + updateAdvancedEmbeddingDetails: ( + key: keyof AdvancedSearchConfiguration, + value: any + ) => void; + advancedEmbeddingDetails: AdvancedSearchConfiguration; + numRerank: number; +} + +const AdvancedEmbeddingFormPage = forwardRef< + FormikProps, + AdvancedEmbeddingFormPageProps +>( + ( + { updateAdvancedEmbeddingDetails, advancedEmbeddingDetails, numRerank }, + ref + ) => { + return ( +
+

+ Advanced Configuration +

+ { + setSubmitting(false); + }} + enableReinitialize={true} + > + {({ values, setFieldValue }) => ( +
+ + {({ push, remove }) => ( +
+ + + List of languages for multilingual expansion. Leave empty + for no additional expansion. + + {values.multilingual_expansion.map( + (_: any, index: number) => ( +
+ + ) => { + const newValue = [ + ...values.multilingual_expansion, + ]; + newValue[index] = e.target.value; + setFieldValue("multilingual_expansion", newValue); + updateAdvancedEmbeddingDetails( + "multilingual_expansion", + newValue + ); + }} + value={values.multilingual_expansion[index]} + /> + + +
+ ) + )} + + +
+ )} +
+ + ) => { + const checked = e.target.checked; + updateAdvancedEmbeddingDetails("multipass_indexing", checked); + setFieldValue("multipass_indexing", checked); + }} + label="Multipass Indexing" + name="multipassIndexing" + /> + ) => { + const checked = e.target.checked; + updateAdvancedEmbeddingDetails( + "disable_rerank_for_streaming", + checked + ); + setFieldValue("disable_rerank_for_streaming", checked); + }} + label="Disable Rerank for Streaming" + name="disableRerankForStreaming" + /> + + + )} +
+
+ ); + } +); + +AdvancedEmbeddingFormPage.displayName = "AdvancedEmbeddingFormPage"; +export default AdvancedEmbeddingFormPage; diff --git a/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx b/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx new file mode 100644 index 00000000000..a7a7a1553a5 --- /dev/null +++ b/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx @@ -0,0 +1,199 @@ +"use client"; + +import { Text, Title } from "@tremor/react"; + +import { + CloudEmbeddingProvider, + CloudEmbeddingModel, + AVAILABLE_CLOUD_PROVIDERS, + CloudEmbeddingProviderFull, + EmbeddingModelDescriptor, +} from "../../../../components/embedding/interfaces"; +import { EmbeddingDetails } from "../EmbeddingModelSelectionForm"; +import { FiExternalLink, FiInfo } from "react-icons/fi"; +import { HoverPopup } from "@/components/HoverPopup"; +import { Dispatch, SetStateAction } from "react"; + +export default function CloudEmbeddingPage({ + currentModel, + embeddingProviderDetails, + newEnabledProviders, + newUnenabledProviders, + setShowTentativeProvider, + setChangeCredentialsProvider, + setAlreadySelectedModel, + setShowTentativeModel, + setShowModelInQueue, +}: { + setShowModelInQueue: Dispatch>; + setShowTentativeModel: Dispatch>; + currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel; + setAlreadySelectedModel: Dispatch>; + newUnenabledProviders: string[]; + embeddingProviderDetails?: EmbeddingDetails[]; + newEnabledProviders: string[]; + setShowTentativeProvider: React.Dispatch< + React.SetStateAction + >; + setChangeCredentialsProvider: React.Dispatch< + React.SetStateAction + >; +}) { + function hasProviderTypeinArray( + arr: Array<{ provider_type: string }>, + searchName: string + ): boolean { + return arr.some( + (item) => item.provider_type.toLowerCase() === searchName.toLowerCase() + ); + } + + let providers: CloudEmbeddingProviderFull[] = AVAILABLE_CLOUD_PROVIDERS.map( + (model) => ({ + ...model, + configured: + !newUnenabledProviders.includes(model.provider_type) && + (newEnabledProviders.includes(model.provider_type) || + (embeddingProviderDetails && + hasProviderTypeinArray( + embeddingProviderDetails, + model.provider_type + ))!), + }) + ); + + return ( +
+ + Here are some cloud-based models to choose from. + + + These models require API keys and run in the clouds of the respective + providers. + + +
+ {providers.map((provider) => ( +
+
+ {provider.icon({ size: 40 })} +

+ {provider.provider_type}{" "} + {provider.provider_type == "Cohere" && "(recommended)"} +

+ + } + popupContent={ +
+
{provider.description}
+
+ } + style="dark" + /> +
+ + +
+ {provider.embedding_models.map((model) => ( + + ))} +
+
+ ))} +
+
+ ); +} + +export function CloudModelCard({ + model, + provider, + currentModel, + setAlreadySelectedModel, + setShowTentativeModel, + setShowModelInQueue, + setShowTentativeProvider, +}: { + model: CloudEmbeddingModel; + provider: CloudEmbeddingProviderFull; + currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel; + setAlreadySelectedModel: Dispatch>; + setShowTentativeModel: Dispatch>; + setShowModelInQueue: Dispatch>; + setShowTentativeProvider: React.Dispatch< + React.SetStateAction + >; +}) { + const enabled = model.model_name === currentModel.model_name; + + return ( +
+ +

{model.description}

+
+ ${model.pricePerMillion}/M tokens +
+
+ +
+
+ ); +} diff --git a/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx new file mode 100644 index 00000000000..5aeb515838d --- /dev/null +++ b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx @@ -0,0 +1,441 @@ +"use client"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; + +import { EmbeddingModelSelection } from "../EmbeddingModelSelectionForm"; +import { useEffect, useState } from "react"; +import { Button, Card, Text } from "@tremor/react"; +import { ArrowLeft, ArrowRight, WarningCircle } from "@phosphor-icons/react"; +import { + CloudEmbeddingModel, + EmbeddingProvider, + HostedEmbeddingModel, +} from "../../../../components/embedding/interfaces"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import useSWR, { mutate } from "swr"; +import { ThreeDotsLoader } from "@/components/Loading"; +import AdvancedEmbeddingFormPage from "./AdvancedEmbeddingFormPage"; +import { + AdvancedSearchConfiguration, + RerankerProvider, + RerankingDetails, + SavedSearchSettings, +} from "../interfaces"; +import RerankingDetailsForm from "../RerankingFormPage"; +import { useEmbeddingFormContext } from "@/components/context/EmbeddingContext"; +import { Modal } from "@/components/Modal"; + +export default function EmbeddingForm() { + const { formStep, nextFormStep, prevFormStep } = useEmbeddingFormContext(); + const { popup, setPopup } = usePopup(); + + const [advancedEmbeddingDetails, setAdvancedEmbeddingDetails] = + useState({ + model_name: "", + model_dim: 0, + normalize: false, + query_prefix: "", + passage_prefix: "", + index_name: "", + multipass_indexing: true, + multilingual_expansion: [], + disable_rerank_for_streaming: false, + }); + + const [rerankingDetails, setRerankingDetails] = useState({ + rerank_api_key: "", + num_rerank: 0, + rerank_provider_type: null, + rerank_model_name: "", + }); + + const updateAdvancedEmbeddingDetails = ( + key: keyof AdvancedSearchConfiguration, + value: any + ) => { + setAdvancedEmbeddingDetails((values) => ({ ...values, [key]: value })); + }; + + async function updateSearchSettings(searchSettings: SavedSearchSettings) { + const response = await fetch( + "/api/search-settings/update-inference-settings", + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + ...searchSettings, + }), + } + ); + return response; + } + + const updateSelectedProvider = ( + model: CloudEmbeddingModel | HostedEmbeddingModel + ) => { + setSelectedProvider(model); + }; + const [displayPoorModelName, setDisplayPoorModelName] = useState(true); + const [showPoorModel, setShowPoorModel] = useState(false); + const [modelTab, setModelTab] = useState<"open" | "cloud" | null>(null); + + const { + data: currentEmbeddingModel, + isLoading: isLoadingCurrentModel, + error: currentEmbeddingModelError, + } = useSWR( + "/api/search-settings/get-current-search-settings", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + const [selectedProvider, setSelectedProvider] = useState< + CloudEmbeddingModel | HostedEmbeddingModel | null + >(currentEmbeddingModel!); + + const { data: searchSettings, isLoading: isLoadingSearchSettings } = + useSWR( + "/api/search-settings/get-current-search-settings", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + + useEffect(() => { + if (searchSettings) { + setAdvancedEmbeddingDetails({ + model_name: searchSettings.model_name, + model_dim: searchSettings.model_dim, + normalize: searchSettings.normalize, + query_prefix: searchSettings.query_prefix, + passage_prefix: searchSettings.passage_prefix, + index_name: searchSettings.index_name, + multipass_indexing: searchSettings.multipass_indexing, + multilingual_expansion: searchSettings.multilingual_expansion, + disable_rerank_for_streaming: + searchSettings.disable_rerank_for_streaming, + }); + setRerankingDetails({ + rerank_api_key: searchSettings.rerank_api_key, + num_rerank: searchSettings.num_rerank, + rerank_provider_type: searchSettings.rerank_provider_type, + rerank_model_name: searchSettings.rerank_model_name, + }); + } + }, [searchSettings]); + + const originalRerankingDetails: RerankingDetails = searchSettings + ? { + rerank_api_key: searchSettings.rerank_api_key, + num_rerank: searchSettings.num_rerank, + rerank_provider_type: searchSettings.rerank_provider_type, + rerank_model_name: searchSettings.rerank_model_name, + } + : { + rerank_api_key: "", + num_rerank: 0, + rerank_provider_type: null, + rerank_model_name: "", + }; + + useEffect(() => { + if (currentEmbeddingModel) { + setSelectedProvider(currentEmbeddingModel); + } + }, [currentEmbeddingModel]); + + useEffect(() => { + if (currentEmbeddingModel) { + setSelectedProvider(currentEmbeddingModel); + } + }, [currentEmbeddingModel]); + if (!selectedProvider) { + return ; + } + if (currentEmbeddingModelError || !currentEmbeddingModel) { + return ; + } + + const updateSearch = async () => { + let values: SavedSearchSettings = { + ...rerankingDetails, + ...advancedEmbeddingDetails, + provider_type: + selectedProvider.provider_type?.toLowerCase() as EmbeddingProvider | null, + }; + + const response = await updateSearchSettings(values); + if (response.ok) { + setPopup({ + message: "Updated search settings succesffuly", + type: "success", + }); + mutate("/api/search-settings/get-current-search-settings"); + return true; + } else { + setPopup({ message: "Failed to update search settings", type: "error" }); + return false; + } + }; + + const onConfirm = async () => { + if (!selectedProvider) { + return; + } + let newModel: SavedSearchSettings; + + if (selectedProvider.provider_type != null) { + // This is a cloud model + newModel = { + ...advancedEmbeddingDetails, + ...selectedProvider, + ...rerankingDetails, + model_name: selectedProvider.model_name, + provider_type: + (selectedProvider.provider_type + ?.toLowerCase() + .split(" ")[0] as EmbeddingProvider) || null, + }; + } else { + // This is a locally hosted model + newModel = { + ...advancedEmbeddingDetails, + ...selectedProvider, + ...rerankingDetails, + model_name: selectedProvider.model_name!, + provider_type: null, + }; + } + newModel.index_name = null; + + const response = await fetch( + "/api/search-settings/set-new-search-settings", + { + method: "POST", + body: JSON.stringify(newModel), + headers: { + "Content-Type": "application/json", + }, + } + ); + if (response.ok) { + setPopup({ + message: "Changed provider suceessfully. Redirecing to embedding page", + type: "success", + }); + mutate("/api/search-settings/get-secondary-search-settings"); + setTimeout(() => { + window.open("/admin/configuration/search", "_self"); + }, 2000); + } else { + setPopup({ message: "Failed to update embedding model", type: "error" }); + + alert(`Failed to update embedding model - ${await response.text()}`); + } + }; + + const needsReIndex = + currentEmbeddingModel != selectedProvider || + searchSettings?.multipass_indexing != + advancedEmbeddingDetails.multipass_indexing; + + const ReIndexingButton = ({ needsReIndex }: { needsReIndex: boolean }) => { + return needsReIndex ? ( +
+ +
+ +
+

Needs re-indexing due to:

+
    + {currentEmbeddingModel != selectedProvider && ( +
  • Changed embedding provider
  • + )} + {searchSettings?.multipass_indexing != + advancedEmbeddingDetails.multipass_indexing && ( +
  • Multipass indexing modification
  • + )} +
+
+
+
+ ) : ( + + ); + }; + + return ( +
+ {popup} + +
+ +
+
+ {formStep == 0 && ( + <> +

+ Select an Embedding Model +

+ + Note that updating the backing model will require a complete + re-indexing of all documents across every connected source. This + is taken care of in the background so that the system can continue + to be used, but depending on the size of the corpus, this could + take hours or days. You can monitor the progress of the + re-indexing on this page while the models are being switched. + + + + +
+ +
+ + )} + {showPoorModel && ( + setShowPoorModel(false)} + width="max-w-3xl" + title={`Are you sure you want to select ${selectedProvider.model_name}?`} + > + <> +
+ {selectedProvider.model_name} is a lower accuracy model. +
+ We recommend the following alternatives. +
  • Cohere embed-english-v3.0 for cloud-based
  • +
  • Nomic nomic-embed-text-v1 for self-hosted
  • +
    +
    + + +
    + +
    + )} + + {formStep == 1 && ( + <> + + + + +
    + + + + +
    + +
    +
    + + )} + {formStep == 2 && ( + <> + + + + +
    + + + +
    + + )} +
    +
    + ); +} diff --git a/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx b/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx new file mode 100644 index 00000000000..2e28ce8e4b8 --- /dev/null +++ b/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx @@ -0,0 +1,69 @@ +"use client"; +import { Button, Card, Text } from "@tremor/react"; +import { ModelSelector } from "../../../../components/embedding/ModelSelector"; +import { + AVAILABLE_MODELS, + CloudEmbeddingModel, + HostedEmbeddingModel, +} from "../../../../components/embedding/interfaces"; +import { CustomModelForm } from "../../../../components/embedding/CustomModelForm"; +import { useState } from "react"; +import { Title } from "@tremor/react"; +export default function OpenEmbeddingPage({ + onSelectOpenSource, + selectedProvider, +}: { + onSelectOpenSource: (model: HostedEmbeddingModel) => Promise; + selectedProvider: HostedEmbeddingModel | CloudEmbeddingModel; +}) { + const [configureModel, setConfigureModel] = useState(false); + return ( +
    + + Here are some locally-hosted models to choose from. + + + These models can be used without any API keys, and can leverage a GPU + for faster inference. + + + + + Alternatively, (if you know what you're doing) you can specify a{" "} + + SentenceTransformers + + -compatible model of your choice below. The rough list of supported + models can be found{" "} + + here + + . +
    + NOTE: not all models listed will work with Danswer, since some + have unique interfaces or special requirements. If in doubt, reach out + to the Danswer team. +
    + {!configureModel && ( + + )} + {configureModel && ( +
    + + + +
    + )} +
    + ); +} diff --git a/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx b/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx new file mode 100644 index 00000000000..6ee8efef511 --- /dev/null +++ b/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx @@ -0,0 +1,189 @@ +"use client"; + +import { Modal } from "@/components/Modal"; +import { PageSelector } from "@/components/PageSelector"; +import { CheckmarkIcon, CopyIcon } from "@/components/icons/icons"; +import { localizeAndPrettify } from "@/lib/time"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeaderCell, + TableRow, + Text, +} from "@tremor/react"; +import { useState } from "react"; +import { IndexAttemptError } from "./types"; + +const NUM_IN_PAGE = 8; + +export function CustomModal({ + isVisible, + onClose, + title, + content, + showCopyButton = false, +}: { + isVisible: boolean; + onClose: () => void; + title: string; + content: string; + showCopyButton?: boolean; +}) { + const [copyClicked, setCopyClicked] = useState(false); + + if (!isVisible) return null; + + return ( + +
    + {showCopyButton && ( +
    + {!copyClicked ? ( +
    { + navigator.clipboard.writeText(content); + setCopyClicked(true); + setTimeout(() => setCopyClicked(false), 2000); + }} + className="flex w-fit cursor-pointer hover:bg-hover-light p-2 border-border border rounded" + > + Copy full content + +
    + ) : ( +
    + Copied to clipboard + +
    + )} +
    + )} +
    {content}
    +
    +
    + ); +} + +export function IndexAttemptErrorsTable({ + indexAttemptErrors, +}: { + indexAttemptErrors: IndexAttemptError[]; +}) { + const [page, setPage] = useState(1); + const [modalData, setModalData] = useState<{ + id: number | null; + title: string; + content: string; + } | null>(null); + const closeModal = () => setModalData(null); + + return ( + <> + {modalData && ( + + )} + + + + + Timestamp + Batch Number + Document Summaries + Error Message + + + + {indexAttemptErrors + .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page) + .map((indexAttemptError) => { + return ( + + + {indexAttemptError.time_created + ? localizeAndPrettify(indexAttemptError.time_created) + : "-"} + + {indexAttemptError.batch_number} + + {indexAttemptError.doc_summaries && ( +
    + setModalData({ + id: indexAttemptError.id, + title: "Document Summaries", + content: JSON.stringify( + indexAttemptError.doc_summaries, + null, + 2 + ), + }) + } + className="mt-2 text-link cursor-pointer select-none" + > + View Document Summaries +
    + )} +
    + +
    + + {indexAttemptError.error_msg || "-"} + + {indexAttemptError.traceback && ( +
    + setModalData({ + id: indexAttemptError.id, + title: "Exception Traceback", + content: indexAttemptError.traceback!, + }) + } + className="mt-2 text-link cursor-pointer select-none" + > + View Full Trace +
    + )} +
    +
    +
    + ); + })} +
    +
    + {indexAttemptErrors.length > NUM_IN_PAGE && ( +
    +
    + { + setPage(newPage); + window.scrollTo({ + top: 0, + left: 0, + behavior: "smooth", + }); + }} + /> +
    +
    + )} + + ); +} diff --git a/web/src/app/admin/indexing/[id]/lib.ts b/web/src/app/admin/indexing/[id]/lib.ts new file mode 100644 index 00000000000..f81f95d8c2f --- /dev/null +++ b/web/src/app/admin/indexing/[id]/lib.ts @@ -0,0 +1,3 @@ +export function buildIndexingErrorsUrl(id: string | number) { + return `/api/manage/admin/indexing-errors/${id}`; +} diff --git a/web/src/app/admin/indexing/[id]/page.tsx b/web/src/app/admin/indexing/[id]/page.tsx new file mode 100644 index 00000000000..51fe694541c --- /dev/null +++ b/web/src/app/admin/indexing/[id]/page.tsx @@ -0,0 +1,58 @@ +"use client"; + +import { BackButton } from "@/components/BackButton"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ValidSources } from "@/lib/types"; +import { Title } from "@tremor/react"; +import useSWR from "swr"; +import { IndexAttemptErrorsTable } from "./IndexAttemptErrorsTable"; +import { buildIndexingErrorsUrl } from "./lib"; +import { IndexAttemptError } from "./types"; + +function Main({ id }: { id: number }) { + const { + data: indexAttemptErrors, + isLoading, + error, + } = useSWR( + buildIndexingErrorsUrl(id), + errorHandlingFetcher + ); + + if (isLoading) { + return ; + } + + if (error || !indexAttemptErrors) { + return ( + + ); + } + + return ( + <> + +
    +
    + Indexing Errors for Attempt {id} +
    + +
    + + ); +} + +export default function Page({ params }: { params: { id: string } }) { + const id = parseInt(params.id); + + return ( +
    +
    +
    + ); +} diff --git a/web/src/app/admin/indexing/[id]/types.ts b/web/src/app/admin/indexing/[id]/types.ts new file mode 100644 index 00000000000..66480805f58 --- /dev/null +++ b/web/src/app/admin/indexing/[id]/types.ts @@ -0,0 +1,15 @@ +export interface IndexAttemptError { + id: number; + index_attempt_id: number; + batch_number: number; + doc_summaries: DocumentErrorSummary[]; + error_msg: string; + traceback: string; + time_created: string; +} + +export interface DocumentErrorSummary { + id: string; + semantic_id: string; + section_link: string; +} diff --git a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx index 86ca489b5ee..a797933ced7 100644 --- a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx +++ b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx @@ -1,185 +1,557 @@ -"use client"; - +import React, { useState, useMemo, useEffect, useRef } from "react"; import { Table, - TableHead, TableRow, TableHeaderCell, TableBody, TableCell, + Badge, + Button, } from "@tremor/react"; -import { CCPairStatus, IndexAttemptStatus } from "@/components/Status"; -import { useEffect, useState } from "react"; -import { PageSelector } from "@/components/PageSelector"; +import { IndexAttemptStatus } from "@/components/Status"; import { timeAgo } from "@/lib/time"; -import { ConnectorIndexingStatus } from "@/lib/types"; -import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; -import { getDocsProcessedPerMinute } from "@/lib/indexAttempt"; +import { + ConnectorIndexingStatus, + ConnectorSummary, + GroupedConnectorSummaries, + ValidSources, +} from "@/lib/types"; import { useRouter } from "next/navigation"; -import { isCurrentlyDeleting } from "@/lib/documentDeletion"; -import { FiCheck, FiEdit2, FiXCircle } from "react-icons/fi"; +import { + FiChevronDown, + FiChevronRight, + FiSettings, + FiLock, + FiUnlock, +} from "react-icons/fi"; +import { Tooltip } from "@/components/tooltip/Tooltip"; +import { SourceIcon } from "@/components/SourceIcon"; +import { getSourceDisplayName } from "@/lib/sources"; +import { CustomTooltip } from "@/components/tooltip/CustomTooltip"; +import { Warning } from "@phosphor-icons/react"; +import Cookies from "js-cookie"; +import { TOGGLED_CONNECTORS_COOKIE_NAME } from "@/lib/constants"; +import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { ConnectorCredentialPairStatus } from "../../connector/[ccPairId]/types"; -const NUM_IN_PAGE = 20; +const columnWidths = { + first: "20%", + second: "15%", + third: "15%", + fourth: "15%", + fifth: "15%", + sixth: "15%", + seventh: "5%", +}; -function CCPairIndexingStatusDisplay({ - ccPairsIndexingStatus, +function SummaryRow({ + source, + summary, + isOpen, + onToggle, }: { - ccPairsIndexingStatus: ConnectorIndexingStatus; + source: ValidSources; + summary: ConnectorSummary; + isOpen: boolean; + onToggle: () => void; }) { - if (ccPairsIndexingStatus.connector.disabled) { - return ( - - ); - } + const activePercentage = (summary.active / summary.count) * 100; + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); - const docsPerMinute = getDocsProcessedPerMinute( - ccPairsIndexingStatus.latest_index_attempt - )?.toFixed(2); return ( - <> - - {ccPairsIndexingStatus?.latest_index_attempt?.new_docs_indexed && - ccPairsIndexingStatus?.latest_index_attempt?.status === "in_progress" ? ( -
    -
    - Current Run:{" "} - {ccPairsIndexingStatus.latest_index_attempt.new_docs_indexed} docs - indexed -
    -
    - Speed:{" "} - {docsPerMinute ? ( - <>{docsPerMinute} docs / min + + +
    +
    + {isOpen ? ( + ) : ( - "calculating rate..." + )}
    + + {getSourceDisplayName(source)}
    - ) : null} - +
    + + +
    Total Connectors
    +
    {summary.count}
    +
    + + +
    Active Connectors
    + +
    +
    +
    +
    + + {summary.active} ({activePercentage.toFixed(0)}%) + +
    +
    +
    + + {isPaidEnterpriseFeaturesEnabled && ( + +
    Public Connectors
    +

    + {summary.public}/{summary.count} +

    +
    + )} + + +
    Total Docs Indexed
    +
    + {summary.totalDocsIndexed.toLocaleString()} +
    +
    + + +
    Errors
    + +
    + {summary.errors > 0 && } + {summary.errors} +
    +
    + + +
    ); } -function ClickableTableRow({ - url, - children, - ...props +function ConnectorRow({ + ccPairsIndexingStatus, + invisible, + isEditable, }: { - url: string; - children: React.ReactNode; - [key: string]: any; // This allows for any additional props + ccPairsIndexingStatus: ConnectorIndexingStatus; + invisible?: boolean; + isEditable: boolean; }) { const router = useRouter(); + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); - useEffect(() => { - router.prefetch(url); - }, [router]); + const handleManageClick = (e: any) => { + e.stopPropagation(); + router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`); + }; + + const getActivityBadge = () => { + if ( + ccPairsIndexingStatus.cc_pair_status === + ConnectorCredentialPairStatus.DELETING + ) { + return ( + +
    +
    + Deleting +
    +
    + ); + } else if ( + ccPairsIndexingStatus.cc_pair_status === + ConnectorCredentialPairStatus.PAUSED + ) { + return ( + +
    +
    + Paused +
    +
    + ); + } - const navigate = () => { - router.push(url); + // ACTIVE case + switch (ccPairsIndexingStatus.last_status) { + case "in_progress": + return ( + +
    +
    + Indexing +
    +
    + ); + case "not_started": + return ( + +
    +
    + Scheduled +
    +
    + ); + default: + return ( + +
    +
    + Active +
    +
    + ); + } }; return ( - - {children} + { + router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`); + }} + > + +

    + {ccPairsIndexingStatus.name} +

    +
    + + {timeAgo(ccPairsIndexingStatus?.last_success) || "-"} + + + {getActivityBadge()} + + {isPaidEnterpriseFeaturesEnabled && ( + + {ccPairsIndexingStatus.public_doc ? ( + + Public + + ) : ( + + Private + + )} + + )} + + {ccPairsIndexingStatus.docs_indexed} + + + + + + {isEditable && ( + + + + )} +
    ); } export function CCPairIndexingStatusTable({ ccPairsIndexingStatuses, + editableCcPairsIndexingStatuses, }: { ccPairsIndexingStatuses: ConnectorIndexingStatus[]; + editableCcPairsIndexingStatuses: ConnectorIndexingStatus[]; }) { - const [page, setPage] = useState(1); - const ccPairsIndexingStatusesForPage = ccPairsIndexingStatuses.slice( - NUM_IN_PAGE * (page - 1), - NUM_IN_PAGE * page - ); + const [searchTerm, setSearchTerm] = useState(""); + + const searchInputRef = useRef(null); + const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); + + useEffect(() => { + if (searchInputRef.current) { + searchInputRef.current.focus(); + } + }, []); + + const [connectorsToggled, setConnectorsToggled] = useState< + Record + >(() => { + const savedState = Cookies.get(TOGGLED_CONNECTORS_COOKIE_NAME); + return savedState ? JSON.parse(savedState) : {}; + }); + + const { groupedStatuses, sortedSources, groupSummaries } = useMemo(() => { + const grouped: Record[]> = + {} as Record[]>; + + // First, add editable connectors + editableCcPairsIndexingStatuses.forEach((status) => { + const source = status.connector.source; + if (!grouped[source]) { + grouped[source] = []; + } + grouped[source].unshift(status); + }); + + // Then, add non-editable connectors + ccPairsIndexingStatuses.forEach((status) => { + const source = status.connector.source; + if (!grouped[source]) { + grouped[source] = []; + } + if ( + !editableCcPairsIndexingStatuses.some( + (e) => e.cc_pair_id === status.cc_pair_id + ) + ) { + grouped[source].push(status); + } + }); + + const sorted = Object.keys(grouped).sort() as ValidSources[]; + + const summaries: GroupedConnectorSummaries = + {} as GroupedConnectorSummaries; + sorted.forEach((source) => { + const statuses = grouped[source]; + summaries[source] = { + count: statuses.length, + active: statuses.filter( + (status) => + status.cc_pair_status === ConnectorCredentialPairStatus.ACTIVE + ).length, + public: statuses.filter((status) => status.public_doc).length, + totalDocsIndexed: statuses.reduce( + (sum, status) => sum + status.docs_indexed, + 0 + ), + errors: statuses.filter((status) => status.last_status === "failed") + .length, + }; + }); + + return { + groupedStatuses: grouped, + sortedSources: sorted, + groupSummaries: summaries, + }; + }, [ccPairsIndexingStatuses, editableCcPairsIndexingStatuses]); + + const toggleSource = ( + source: ValidSources, + toggled: boolean | null = null + ) => { + const newConnectorsToggled = { + ...connectorsToggled, + [source]: toggled == null ? !connectorsToggled[source] : toggled, + }; + setConnectorsToggled(newConnectorsToggled); + Cookies.set( + TOGGLED_CONNECTORS_COOKIE_NAME, + JSON.stringify(newConnectorsToggled) + ); + }; + const toggleSources = () => { + const currentToggledCount = + Object.values(connectorsToggled).filter(Boolean).length; + const shouldToggleOn = currentToggledCount < sortedSources.length / 2; + + const connectors = sortedSources.reduce( + (acc, source) => { + acc[source] = shouldToggleOn; + return acc; + }, + {} as Record + ); + + setConnectorsToggled(connectors); + Cookies.set(TOGGLED_CONNECTORS_COOKIE_NAME, JSON.stringify(connectors)); + }; + const shouldExpand = + Object.values(connectorsToggled).filter(Boolean).length < + sortedSources.length / 2; return ( -
    - - - - Connector - Status - Is Public - Last Indexed - Docs Indexed - - +
    +
    + +
    + - {ccPairsIndexingStatusesForPage.map((ccPairsIndexingStatus) => { - return ( - - -
    - -
    - -
    -
    -
    - - - - - {ccPairsIndexingStatus.public_doc ? ( - - ) : ( - - )} - - - {timeAgo(ccPairsIndexingStatus?.last_success) || "-"} - - {ccPairsIndexingStatus.docs_indexed} -
    - ); - })} -
    -
    - {ccPairsIndexingStatuses.length > NUM_IN_PAGE && ( -
    -
    - { - setPage(newPage); - window.scrollTo({ - top: 0, - left: 0, - behavior: "smooth", - }); - }} +
    + setSearchTerm(e.target.value)} + className="ml-2 w-96 h-9 flex-none rounded-md border border-border bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" /> + +
    -
    - )} + {sortedSources + .filter((source) => source != "not_applicable") + .map((source, ind) => { + const sourceMatches = source + .toLowerCase() + .includes(searchTerm.toLowerCase()); + const matchingConnectors = groupedStatuses[source].filter( + (status) => + (status.name || "") + .toLowerCase() + .includes(searchTerm.toLowerCase()) + ); + if (sourceMatches || matchingConnectors.length > 0) { + return ( + +
    + + toggleSource(source)} + /> + + {connectorsToggled[source] && ( + <> + + + Name + + + Last Indexed + + + Activity + + {isPaidEnterpriseFeaturesEnabled && ( + + Permissions + + )} + + Total Docs + + + Last Status + + + + {(sourceMatches + ? groupedStatuses[source] + : matchingConnectors + ).map((ccPairsIndexingStatus) => ( + + e.cc_pair_id === + ccPairsIndexingStatus.cc_pair_id + )} + /> + ))} + + )} + + ); + } + return null; + })} + + +
    +
    ); } diff --git a/web/src/app/admin/indexing/status/page.tsx b/web/src/app/admin/indexing/status/page.tsx index 8cebea2349a..f5d64d3ac3a 100644 --- a/web/src/app/admin/indexing/status/page.tsx +++ b/web/src/app/admin/indexing/status/page.tsx @@ -10,26 +10,35 @@ import { CCPairIndexingStatusTable } from "./CCPairIndexingStatusTable"; import { AdminPageTitle } from "@/components/admin/Title"; import Link from "next/link"; import { Button, Text } from "@tremor/react"; +import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; function Main() { const { data: indexAttemptData, isLoading: indexAttemptIsLoading, error: indexAttemptError, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status", - errorHandlingFetcher, - { refreshInterval: 10000 } // 10 seconds - ); + } = useConnectorCredentialIndexingStatus(); + const { + data: editableIndexAttemptData, + isLoading: editableIndexAttemptIsLoading, + error: editableIndexAttemptError, + } = useConnectorCredentialIndexingStatus(undefined, true); - if (indexAttemptIsLoading) { + if (indexAttemptIsLoading || editableIndexAttemptIsLoading) { return ; } - if (indexAttemptError || !indexAttemptData) { + if ( + indexAttemptError || + !indexAttemptData || + editableIndexAttemptError || + !editableIndexAttemptData + ) { return (
    - {indexAttemptError?.info?.detail || "Error loading indexing history."} + {indexAttemptError?.info?.detail || + editableIndexAttemptError?.info?.detail || + "Error loading indexing history."}
    ); } @@ -58,7 +67,10 @@ function Main() { }); return ( - + ); } diff --git a/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx b/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx deleted file mode 100644 index 7572ac2ce8f..00000000000 --- a/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx +++ /dev/null @@ -1,74 +0,0 @@ -import { Modal } from "@/components/Modal"; -import { Button, Text, Callout } from "@tremor/react"; -import { EmbeddingModelDescriptor } from "./embeddingModels"; - -export function ModelSelectionConfirmaion({ - selectedModel, - isCustom, - onConfirm, -}: { - selectedModel: EmbeddingModelDescriptor; - isCustom: boolean; - onConfirm: () => void; -}) { - return ( -
    - - You have selected: {selectedModel.model_name}. Are you sure you - want to update to this new embedding model? - - - We will re-index all your documents in the background so you will be - able to continue to use Danswer as normal with the old model in the - meantime. Depending on how many documents you have indexed, this may - take a while. - - - NOTE: this re-indexing process will consume more resources than - normal. If you are self-hosting, we recommend that you allocate at least - 16GB of RAM to Danswer during this process. - - - {isCustom && ( - - We've detected that this is a custom-specified embedding model. - Since we have to download the model files before verifying the - configuration's correctness, we won't be able to let you - know if the configuration is valid until after we start - re-indexing your documents. If there is an issue, it will show up on - this page as an indexing error on this page after clicking Confirm. - - )} - -
    - -
    -
    - ); -} - -export function ModelSelectionConfirmaionModal({ - selectedModel, - isCustom, - onConfirm, - onCancel, -}: { - selectedModel: EmbeddingModelDescriptor; - isCustom: boolean; - onConfirm: () => void; - onCancel: () => void; -}) { - return ( - -
    - -
    -
    - ); -} diff --git a/web/src/app/admin/models/embedding/ModelSelector.tsx b/web/src/app/admin/models/embedding/ModelSelector.tsx deleted file mode 100644 index bc1c8b1656f..00000000000 --- a/web/src/app/admin/models/embedding/ModelSelector.tsx +++ /dev/null @@ -1,87 +0,0 @@ -import { DefaultDropdown, StringOrNumberOption } from "@/components/Dropdown"; -import { Title, Text, Divider, Card } from "@tremor/react"; -import { - EmbeddingModelDescriptor, - FullEmbeddingModelDescriptor, -} from "./embeddingModels"; -import { FiStar } from "react-icons/fi"; -import { CustomModelForm } from "./CustomModelForm"; - -export function ModelOption({ - model, - onSelect, -}: { - model: FullEmbeddingModelDescriptor; - onSelect?: (model: EmbeddingModelDescriptor) => void; -}) { - return ( -
    -
    - {model.isDefault && } - {model.model_name} -
    -
    - {model.description - ? model.description - : "Custom model—no description is available."} -
    - {model.link && ( - - See More Details - - )} - {onSelect && ( -
    onSelect(model)} - > - Select Model -
    - )} -
    - ); -} - -export function ModelSelector({ - modelOptions, - setSelectedModel, -}: { - modelOptions: FullEmbeddingModelDescriptor[]; - setSelectedModel: (model: EmbeddingModelDescriptor) => void; -}) { - return ( -
    -
    - {modelOptions.map((modelOption) => ( - - ))} -
    -
    - ); -} diff --git a/web/src/app/admin/models/embedding/embeddingModels.ts b/web/src/app/admin/models/embedding/embeddingModels.ts deleted file mode 100644 index 7c5d09180f9..00000000000 --- a/web/src/app/admin/models/embedding/embeddingModels.ts +++ /dev/null @@ -1,87 +0,0 @@ -export interface EmbeddingModelResponse { - model_name: string | null; -} - -export interface FullEmbeddingModelResponse { - current_model_name: string; - secondary_model_name: string | null; -} - -export interface EmbeddingModelDescriptor { - model_name: string; - model_dim: number; - normalize: boolean; - query_prefix?: string; - passage_prefix?: string; -} - -export interface FullEmbeddingModelDescriptor extends EmbeddingModelDescriptor { - description: string; - isDefault?: boolean; - link?: string; -} - -export const AVAILABLE_MODELS: FullEmbeddingModelDescriptor[] = [ - { - model_name: "intfloat/e5-base-v2", - model_dim: 768, - normalize: true, - description: - "The recommended default for most situations. If you aren't sure which model to use, this is probably the one.", - isDefault: true, - link: "https://huggingface.co/intfloat/e5-base-v2", - query_prefix: "query: ", - passage_prefix: "passage: ", - }, - { - model_name: "intfloat/e5-small-v2", - model_dim: 384, - normalize: true, - description: - "A smaller / faster version of the default model. If you're running Danswer on a resource constrained system, then this is a good choice.", - link: "https://huggingface.co/intfloat/e5-small-v2", - query_prefix: "query: ", - passage_prefix: "passage: ", - }, - { - model_name: "intfloat/multilingual-e5-base", - model_dim: 768, - normalize: true, - description: - "If you have many documents in other languages besides English, this is the one to go for.", - link: "https://huggingface.co/intfloat/multilingual-e5-base", - query_prefix: "query: ", - passage_prefix: "passage: ", - }, - { - model_name: "intfloat/multilingual-e5-small", - model_dim: 384, - normalize: true, - description: - "If you have many documents in other languages besides English, and you're running on a resource constrained system, then this is the one to go for.", - link: "https://huggingface.co/intfloat/multilingual-e5-base", - query_prefix: "query: ", - passage_prefix: "passage: ", - }, -]; - -export const INVALID_OLD_MODEL = "thenlper/gte-small"; - -export function checkModelNameIsValid(modelName: string | undefined | null) { - if (!modelName) { - return false; - } - if (modelName === INVALID_OLD_MODEL) { - return false; - } - return true; -} - -export function fillOutEmeddingModelDescriptor( - embeddingModel: EmbeddingModelDescriptor | FullEmbeddingModelDescriptor -): FullEmbeddingModelDescriptor { - return { - ...embeddingModel, - description: "", - }; -} diff --git a/web/src/app/admin/models/embedding/page.tsx b/web/src/app/admin/models/embedding/page.tsx deleted file mode 100644 index ccda9af19ad..00000000000 --- a/web/src/app/admin/models/embedding/page.tsx +++ /dev/null @@ -1,349 +0,0 @@ -"use client"; - -import { ThreeDotsLoader } from "@/components/Loading"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { Button, Card, Text, Title } from "@tremor/react"; -import { FiPackage } from "react-icons/fi"; -import useSWR, { mutate } from "swr"; -import { ModelOption, ModelSelector } from "./ModelSelector"; -import { useState } from "react"; -import { ModelSelectionConfirmaionModal } from "./ModelSelectionConfirmation"; -import { ReindexingProgressTable } from "./ReindexingProgressTable"; -import { Modal } from "@/components/Modal"; -import { - AVAILABLE_MODELS, - EmbeddingModelDescriptor, - INVALID_OLD_MODEL, - fillOutEmeddingModelDescriptor, -} from "./embeddingModels"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { Connector, ConnectorIndexingStatus } from "@/lib/types"; -import Link from "next/link"; -import { CustomModelForm } from "./CustomModelForm"; - -function Main() { - const [tentativeNewEmbeddingModel, setTentativeNewEmbeddingModel] = - useState(null); - const [isCancelling, setIsCancelling] = useState(false); - const [showAddConnectorPopup, setShowAddConnectorPopup] = - useState(false); - - const { - data: currentEmeddingModel, - isLoading: isLoadingCurrentModel, - error: currentEmeddingModelError, - } = useSWR( - "/api/secondary-index/get-current-embedding-model", - errorHandlingFetcher, - { refreshInterval: 5000 } // 5 seconds - ); - const { - data: futureEmbeddingModel, - isLoading: isLoadingFutureModel, - error: futureEmeddingModelError, - } = useSWR( - "/api/secondary-index/get-secondary-embedding-model", - errorHandlingFetcher, - { refreshInterval: 5000 } // 5 seconds - ); - const { - data: ongoingReIndexingStatus, - isLoading: isLoadingOngoingReIndexingStatus, - } = useSWR[]>( - "/api/manage/admin/connector/indexing-status?secondary_index=true", - errorHandlingFetcher, - { refreshInterval: 5000 } // 5 seconds - ); - const { data: connectors } = useSWR[]>( - "/api/manage/connector", - errorHandlingFetcher, - { refreshInterval: 5000 } // 5 seconds - ); - - const onSelect = async (model: EmbeddingModelDescriptor) => { - if (currentEmeddingModel?.model_name === INVALID_OLD_MODEL) { - await onConfirm(model); - } else { - setTentativeNewEmbeddingModel(model); - } - }; - - const onConfirm = async (model: EmbeddingModelDescriptor) => { - const response = await fetch( - "/api/secondary-index/set-new-embedding-model", - { - method: "POST", - body: JSON.stringify(model), - headers: { - "Content-Type": "application/json", - }, - } - ); - if (response.ok) { - setTentativeNewEmbeddingModel(null); - mutate("/api/secondary-index/get-secondary-embedding-model"); - if (!connectors || !connectors.length) { - setShowAddConnectorPopup(true); - } - } else { - alert(`Failed to update embedding model - ${await response.text()}`); - } - }; - - const onCancel = async () => { - const response = await fetch("/api/secondary-index/cancel-new-embedding", { - method: "POST", - }); - if (response.ok) { - setTentativeNewEmbeddingModel(null); - mutate("/api/secondary-index/get-secondary-embedding-model"); - } else { - alert( - `Failed to cancel embedding model update - ${await response.text()}` - ); - } - - setIsCancelling(false); - }; - - if (isLoadingCurrentModel || isLoadingFutureModel) { - return ; - } - - if ( - currentEmeddingModelError || - !currentEmeddingModel || - futureEmeddingModelError - ) { - return ; - } - - const currentModelName = currentEmeddingModel.model_name; - const currentModel = - AVAILABLE_MODELS.find((model) => model.model_name === currentModelName) || - fillOutEmeddingModelDescriptor(currentEmeddingModel); - - const newModelSelection = futureEmbeddingModel - ? AVAILABLE_MODELS.find( - (model) => model.model_name === futureEmbeddingModel.model_name - ) || fillOutEmeddingModelDescriptor(futureEmbeddingModel) - : null; - - return ( -
    - {tentativeNewEmbeddingModel && ( - - model.model_name === tentativeNewEmbeddingModel.model_name - ) === undefined - } - onConfirm={() => onConfirm(tentativeNewEmbeddingModel)} - onCancel={() => setTentativeNewEmbeddingModel(null)} - /> - )} - - {showAddConnectorPopup && ( - -
    -
    - Embeding model successfully selected{" "} - 🙌 -
    -
    - To complete the initial setup, let's add a connector! -
    -
    - Connectors are the way that Danswer gets data from your - organization's various data sources. Once setup, we'll - automatically sync data from your apps and docs into Danswer, so - you can search all through all of them in one place. -
    -
    - - - -
    -
    -
    - )} - - {isCancelling && ( - setIsCancelling(false)} - title="Cancel Embedding Model Switch" - > -
    -
    - Are you sure you want to cancel? -
    -
    - Cancelling will revert to the previous model and all progress will - be lost. -
    -
    - -
    -
    -
    - )} - - - Embedding models are used to generate embeddings for your documents, - which then power Danswer's search. - - - {currentModel ? ( - <> - Current Embedding Model - - - - - - ) : ( - newModelSelection && - (!connectors || !connectors.length) && ( - <> - Current Embedding Model - - - - - - ) - )} - - {!showAddConnectorPopup && - (!newModelSelection ? ( -
    - {currentModel ? ( - <> - Switch your Embedding Model - - - If the current model is not working for you, you can update - your model choice below. Note that this will require a - complete re-indexing of all your documents across every - connected source. We will take care of this in the background, - but depending on the size of your corpus, this could take - hours, day, or even weeks. You can monitor the progress of the - re-indexing on this page. - - - ) : ( - <> - Choose your Embedding Model - - )} - - - Below are a curated selection of quality models that we recommend - you choose from. - - - modelOption.model_name !== currentModelName - )} - setSelectedModel={onSelect} - /> - - - Alternatively, (if you know what you're doing) you can - specify a{" "} - - SentenceTransformers - - -compatible model of your choice below. The rough list of - supported models can be found{" "} - - here - - . -
    - NOTE: not all models listed will work with Danswer, since - some have unique interfaces or special requirements. If in doubt, - reach out to the Danswer team. -
    - -
    - - - -
    -
    - ) : ( - connectors && - connectors.length > 0 && ( -
    - Current Upgrade Status -
    -
    - Currently in the process of switching to: -
    - - - - - - The table below shows the re-indexing progress of all existing - connectors. Once all connectors have been re-indexed - successfully, the new model will be used for all search - queries. Until then, we will use the old model so that no - downtime is necessary during this transition. - - - {isLoadingOngoingReIndexingStatus ? ( - - ) : ongoingReIndexingStatus ? ( - - ) : ( - - )} -
    -
    - ) - ))} -
    - ); -} - -function Page() { - return ( -
    - } - /> - -
    -
    - ); -} - -export default Page; diff --git a/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx b/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx deleted file mode 100644 index 88b72f9411d..00000000000 --- a/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx +++ /dev/null @@ -1,427 +0,0 @@ -import { LoadingAnimation } from "@/components/Loading"; -import { Button, Divider, Text } from "@tremor/react"; -import { - ArrayHelpers, - ErrorMessage, - Field, - FieldArray, - Form, - Formik, -} from "formik"; -import { FiPlus, FiTrash, FiX } from "react-icons/fi"; -import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; -import { - Label, - SubLabel, - TextArrayField, - TextFormField, -} from "@/components/admin/connectors/Field"; -import { useState } from "react"; -import { useSWRConfig } from "swr"; -import { FullLLMProvider } from "./interfaces"; -import { PopupSpec } from "@/components/admin/connectors/Popup"; -import * as Yup from "yup"; -import isEqual from "lodash/isEqual"; - -function customConfigProcessing(customConfigsList: [string, string][]) { - const customConfig: { [key: string]: string } = {}; - customConfigsList.forEach(([key, value]) => { - customConfig[key] = value; - }); - return customConfig; -} - -export function CustomLLMProviderUpdateForm({ - onClose, - existingLlmProvider, - shouldMarkAsDefault, - setPopup, -}: { - onClose: () => void; - existingLlmProvider?: FullLLMProvider; - shouldMarkAsDefault?: boolean; - setPopup?: (popup: PopupSpec) => void; -}) { - const { mutate } = useSWRConfig(); - - const [isTesting, setIsTesting] = useState(false); - const [testError, setTestError] = useState(""); - - // Define the initial values based on the provider's requirements - const initialValues = { - name: existingLlmProvider?.name ?? "", - provider: existingLlmProvider?.provider ?? "", - api_key: existingLlmProvider?.api_key ?? "", - api_base: existingLlmProvider?.api_base ?? "", - api_version: existingLlmProvider?.api_version ?? "", - default_model_name: existingLlmProvider?.default_model_name ?? null, - fast_default_model_name: - existingLlmProvider?.fast_default_model_name ?? null, - model_names: existingLlmProvider?.model_names ?? [], - custom_config_list: existingLlmProvider?.custom_config - ? Object.entries(existingLlmProvider.custom_config) - : [], - }; - - // Setup validation schema if required - const validationSchema = Yup.object({ - name: Yup.string().required("Display Name is required"), - provider: Yup.string().required("Provider Name is required"), - api_key: Yup.string(), - api_base: Yup.string(), - api_version: Yup.string(), - model_names: Yup.array(Yup.string().required("Model name is required")), - default_model_name: Yup.string().required("Model name is required"), - fast_default_model_name: Yup.string().nullable(), - custom_config_list: Yup.array(), - }); - - return ( - { - setSubmitting(true); - - if (values.model_names.length === 0) { - const fullErrorMsg = "At least one model name is required"; - if (setPopup) { - setPopup({ - type: "error", - message: fullErrorMsg, - }); - } else { - alert(fullErrorMsg); - } - setSubmitting(false); - return; - } - - // test the configuration - if (!isEqual(values, initialValues)) { - setIsTesting(true); - - const response = await fetch("/api/admin/llm/test", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - custom_config: customConfigProcessing(values.custom_config_list), - ...values, - }), - }); - setIsTesting(false); - - if (!response.ok) { - const errorMsg = (await response.json()).detail; - setTestError(errorMsg); - return; - } - } - - const response = await fetch(LLM_PROVIDERS_ADMIN_URL, { - method: "PUT", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - ...values, - custom_config: customConfigProcessing(values.custom_config_list), - }), - }); - - if (!response.ok) { - const errorMsg = (await response.json()).detail; - const fullErrorMsg = existingLlmProvider - ? `Failed to update provider: ${errorMsg}` - : `Failed to enable provider: ${errorMsg}`; - if (setPopup) { - setPopup({ - type: "error", - message: fullErrorMsg, - }); - } else { - alert(fullErrorMsg); - } - return; - } - - if (shouldMarkAsDefault) { - const newLlmProvider = (await response.json()) as FullLLMProvider; - const setDefaultResponse = await fetch( - `${LLM_PROVIDERS_ADMIN_URL}/${newLlmProvider.id}/default`, - { - method: "POST", - } - ); - if (!setDefaultResponse.ok) { - const errorMsg = (await setDefaultResponse.json()).detail; - const fullErrorMsg = `Failed to set provider as default: ${errorMsg}`; - if (setPopup) { - setPopup({ - type: "error", - message: fullErrorMsg, - }); - } else { - alert(fullErrorMsg); - } - return; - } - } - - mutate(LLM_PROVIDERS_ADMIN_URL); - onClose(); - - const successMsg = existingLlmProvider - ? "Provider updated successfully!" - : "Provider enabled successfully!"; - if (setPopup) { - setPopup({ - type: "success", - message: successMsg, - }); - } else { - alert(successMsg); - } - - setSubmitting(false); - }} - > - {({ values }) => ( -
    - - - - - - Should be one of the providers listed at{" "} - - https://docs.litellm.ai/docs/providers - - . - - } - placeholder="Name of the custom provider" - /> - - - - - Fill in the following as is needed. Refer to the LiteLLM - documentation for the model provider name specified above in order - to determine which fields are required. - - - - - - - - - - - <> -
    - Additional configurations needed by the model provider. Are - passed to litellm via environment variables. -
    - -
    - For example, when configuring the Cloudflare provider, you would - need to set `CLOUDFLARE_ACCOUNT_ID` as the key and your - Cloudflare account ID as the value. -
    - -
    - - ) => ( -
    - {values.custom_config_list.map((_, index) => { - return ( -
    -
    -
    -
    - - - -
    - -
    - - - -
    -
    -
    - arrayHelpers.remove(index)} - /> -
    -
    -
    - ); - })} - - -
    - )} - /> - - - - - - - - - - - - - -
    - {/* NOTE: this is above the test button to make sure it's visible */} - {testError && {testError}} - -
    - - {existingLlmProvider && ( - - )} -
    -
    - - )} -
    - ); -} diff --git a/web/src/app/admin/models/llm/constants.ts b/web/src/app/admin/models/llm/constants.ts deleted file mode 100644 index 2db434ee9a3..00000000000 --- a/web/src/app/admin/models/llm/constants.ts +++ /dev/null @@ -1 +0,0 @@ -export const LLM_PROVIDERS_ADMIN_URL = "/api/admin/llm/provider"; diff --git a/web/src/app/admin/prompt-library/hooks.ts b/web/src/app/admin/prompt-library/hooks.ts new file mode 100644 index 00000000000..ccab6b34079 --- /dev/null +++ b/web/src/app/admin/prompt-library/hooks.ts @@ -0,0 +1,46 @@ +import useSWR from "swr"; +import { InputPrompt } from "./interfaces"; + +const fetcher = (url: string) => fetch(url).then((res) => res.json()); + +export const useAdminInputPrompts = () => { + const { data, error, mutate } = useSWR( + `/api/admin/input_prompt`, + fetcher + ); + + return { + data, + error, + isLoading: !error && !data, + refreshInputPrompts: mutate, + }; +}; + +export const useInputPrompts = (includePublic: boolean = false) => { + const { data, error, mutate } = useSWR( + `/api/input_prompt${includePublic ? "?include_public=true" : ""}`, + fetcher + ); + + return { + data, + error, + isLoading: !error && !data, + refreshInputPrompts: mutate, + }; +}; + +export const useInputPrompt = (id: number) => { + const { data, error, mutate } = useSWR( + `/api/input_prompt/${id}`, + fetcher + ); + + return { + data, + error, + isLoading: !error && !data, + refreshInputPrompt: mutate, + }; +}; diff --git a/web/src/app/admin/prompt-library/interfaces.ts b/web/src/app/admin/prompt-library/interfaces.ts new file mode 100644 index 00000000000..9143a0ea870 --- /dev/null +++ b/web/src/app/admin/prompt-library/interfaces.ts @@ -0,0 +1,31 @@ +export interface InputPrompt { + id: number; + prompt: string; + content: string; + active: boolean; + is_public: string; +} + +export interface EditPromptModalProps { + onClose: () => void; + + promptId: number; + editInputPrompt: ( + promptId: number, + values: CreateInputPromptRequest + ) => Promise; +} +export interface CreateInputPromptRequest { + prompt: string; + content: string; +} + +export interface AddPromptModalProps { + onClose: () => void; + onSubmit: (promptData: CreateInputPromptRequest) => void; +} +export interface PromptData { + id: number; + prompt: string; + content: string; +} diff --git a/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx b/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx new file mode 100644 index 00000000000..8f826098bc4 --- /dev/null +++ b/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx @@ -0,0 +1,84 @@ +import React from "react"; +import { Formik, Form, Field, ErrorMessage } from "formik"; +import * as Yup from "yup"; +import { ModalWrapper } from "@/components/modals/ModalWrapper"; +import { Button, Textarea, TextInput } from "@tremor/react"; + +import { BookstackIcon } from "@/components/icons/icons"; +import { AddPromptModalProps } from "../interfaces"; +import { TextFormField } from "@/components/admin/connectors/Field"; + +const AddPromptSchema = Yup.object().shape({ + title: Yup.string().required("Title is required"), + prompt: Yup.string().required("Prompt is required"), +}); + +const AddPromptModal = ({ onClose, onSubmit }: AddPromptModalProps) => { + const defaultPrompts = [ + { + title: "Email help", + prompt: "Write a professional email addressing the following points:", + }, + { + title: "Code explanation", + prompt: "Explain the following code snippet in simple terms:", + }, + { + title: "Product description", + prompt: "Write a compelling product description for the following item:", + }, + { + title: "Troubleshooting steps", + prompt: + "Provide step-by-step troubleshooting instructions for the following issue:", + }, + ]; + + return ( + + { + onSubmit({ + prompt: values.title, + content: values.prompt, + }); + setSubmitting(false); + onClose(); + }} + > + {({ isSubmitting, setFieldValue }) => ( +
    +

    + + Add prompt +

    + + + + + + + + )} +
    +
    + ); +}; + +export default AddPromptModal; diff --git a/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx b/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx new file mode 100644 index 00000000000..a5b44da512d --- /dev/null +++ b/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx @@ -0,0 +1,138 @@ +import React from "react"; +import { Formik, Form, Field, ErrorMessage } from "formik"; +import * as Yup from "yup"; +import { ModalWrapper } from "@/components/modals/ModalWrapper"; +import { Button, Textarea, TextInput } from "@tremor/react"; +import { useInputPrompt } from "../hooks"; +import { EditPromptModalProps } from "../interfaces"; + +const EditPromptSchema = Yup.object().shape({ + prompt: Yup.string().required("Title is required"), + content: Yup.string().required("Content is required"), + active: Yup.boolean(), +}); + +const EditPromptModal = ({ + onClose, + promptId, + editInputPrompt, +}: EditPromptModalProps) => { + const { + data: promptData, + error, + refreshInputPrompt, + } = useInputPrompt(promptId); + + if (error) + return ( + +

    Failed to load prompt data

    +
    + ); + + if (!promptData) + return ( + +

    Loading...

    +
    + ); + + return ( + + { + editInputPrompt(promptId, values); + refreshInputPrompt(); + }} + > + {({ isSubmitting, values }) => ( +
    +

    + + + + Edit prompt +

    + +
    +
    + + + +
    + +
    + + + +
    + +
    + +
    +
    + +
    + +
    +
    + )} +
    +
    + ); +}; + +export default EditPromptModal; diff --git a/web/src/app/admin/prompt-library/page.tsx b/web/src/app/admin/prompt-library/page.tsx new file mode 100644 index 00000000000..d7c72ff5fc3 --- /dev/null +++ b/web/src/app/admin/prompt-library/page.tsx @@ -0,0 +1,32 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { ClosedBookIcon } from "@/components/icons/icons"; +import { useAdminInputPrompts } from "./hooks"; +import { PromptSection } from "./promptSection"; + +const Page = () => { + const { + data: promptLibrary, + error: promptLibraryError, + isLoading: promptLibraryIsLoading, + refreshInputPrompts: refreshPrompts, + } = useAdminInputPrompts(); + + return ( +
    + } + title="Prompt Library" + /> + +
    + ); +}; +export default Page; diff --git a/web/src/app/admin/prompt-library/promptLibrary.tsx b/web/src/app/admin/prompt-library/promptLibrary.tsx new file mode 100644 index 00000000000..0b47bf40ad1 --- /dev/null +++ b/web/src/app/admin/prompt-library/promptLibrary.tsx @@ -0,0 +1,260 @@ +"use client"; + +import { EditIcon, TrashIcon } from "@/components/icons/icons"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { MagnifyingGlass } from "@phosphor-icons/react"; +import { useState } from "react"; +import { + Table, + TableHead, + TableRow, + TableHeaderCell, + TableBody, + TableCell, +} from "@tremor/react"; +import { FilterDropdown } from "@/components/search/filtering/FilterDropdown"; +import { FiTag } from "react-icons/fi"; +import { PageSelector } from "@/components/PageSelector"; +import { InputPrompt } from "./interfaces"; +import { Modal } from "@/components/Modal"; + +const CategoryBubble = ({ + name, + onDelete, +}: { + name: string; + onDelete?: () => void; +}) => ( + + {name} + {onDelete && ( + + )} + +); + +const NUM_RESULTS_PER_PAGE = 10; + +export const PromptLibraryTable = ({ + promptLibrary, + refresh, + setPopup, + handleEdit, + isPublic, +}: { + promptLibrary: InputPrompt[]; + refresh: () => void; + setPopup: (popup: PopupSpec | null) => void; + handleEdit: (promptId: number) => void; + isPublic: boolean; +}) => { + const [query, setQuery] = useState(""); + const [currentPage, setCurrentPage] = useState(1); + const [selectedStatus, setSelectedStatus] = useState([]); + + const columns = [ + { name: "Prompt", key: "prompt" }, + { name: "Content", key: "content" }, + { name: "Status", key: "status" }, + { name: "", key: "edit" }, + { name: "", key: "delete" }, + ]; + + const filteredPromptLibrary = promptLibrary.filter((item) => { + const cleanedQuery = query.toLowerCase(); + const searchMatch = + item.prompt.toLowerCase().includes(cleanedQuery) || + item.content.toLowerCase().includes(cleanedQuery); + const statusMatch = + selectedStatus.length === 0 || + (selectedStatus.includes("Active") && item.active) || + (selectedStatus.includes("Inactive") && !item.active); + + return searchMatch && statusMatch; + }); + + const totalPages = Math.ceil( + filteredPromptLibrary.length / NUM_RESULTS_PER_PAGE + ); + const startIndex = (currentPage - 1) * NUM_RESULTS_PER_PAGE; + const endIndex = startIndex + NUM_RESULTS_PER_PAGE; + const paginatedPromptLibrary = filteredPromptLibrary.slice( + startIndex, + endIndex + ); + + const handlePageChange = (page: number) => { + setCurrentPage(page); + }; + + const handleDelete = async (id: number) => { + const response = await fetch( + `/api${isPublic ? "/admin" : ""}/input_prompt/${id}`, + { + method: "DELETE", + } + ); + if (!response.ok) { + setPopup({ message: "Failed to delete input prompt", type: "error" }); + } + refresh(); + }; + + const handleStatusSelect = (status: string) => { + setSelectedStatus((prev) => { + if (prev.includes(status)) { + return prev.filter((s) => s !== status); + } + return [...prev, status]; + }); + }; + + const [confirmDeletionId, setConfirmDeletionId] = useState( + null + ); + + return ( +
    + {confirmDeletionId != null && ( + setConfirmDeletionId(null)} + className="max-w-sm" + > + <> +

    + Are you sure you want to delete this prompt? You will not be able + to recover this prompt +

    +
    + + +
    + +
    + )} + +
    + + { + setQuery(event.target.value); + setCurrentPage(1); + }} + /> +
    +
    + handleStatusSelect(option.key)} + icon={} + defaultDisplay="All Statuses" + /> +
    + {selectedStatus.map((status) => ( + handleStatusSelect(status)} + /> + ))} +
    +
    +
    + + + + {columns.map((column) => ( + + {column.name} + + ))} + + + + {paginatedPromptLibrary.length > 0 ? ( + paginatedPromptLibrary + .filter((prompt) => !(!isPublic && prompt.is_public)) + .map((item) => ( + + {item.prompt} + {item.content} + {item.active ? "Active" : "Inactive"} + + + + + + + + )) + ) : ( + + No matching prompts found... + + )} + +
    + {paginatedPromptLibrary.length > 0 && ( +
    + +
    + )} +
    +
    + ); +}; diff --git a/web/src/app/admin/prompt-library/promptSection.tsx b/web/src/app/admin/prompt-library/promptSection.tsx new file mode 100644 index 00000000000..f719ad500bd --- /dev/null +++ b/web/src/app/admin/prompt-library/promptSection.tsx @@ -0,0 +1,146 @@ +"use client"; + +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { Button, Divider, Text } from "@tremor/react"; +import { useState } from "react"; +import AddPromptModal from "./modals/AddPromptModal"; +import EditPromptModal from "./modals/EditPromptModal"; +import { PromptLibraryTable } from "./promptLibrary"; +import { CreateInputPromptRequest, InputPrompt } from "./interfaces"; + +export const PromptSection = ({ + promptLibrary, + isLoading, + error, + refreshPrompts, + centering = false, + isPublic, +}: { + promptLibrary: InputPrompt[]; + isLoading: boolean; + error: any; + refreshPrompts: () => void; + centering?: boolean; + isPublic: boolean; +}) => { + const { popup, setPopup } = usePopup(); + const [newPrompt, setNewPrompt] = useState(false); + const [newPromptId, setNewPromptId] = useState(null); + + const createInputPrompt = async ( + promptData: CreateInputPromptRequest + ): Promise => { + const response = await fetch("/api/input_prompt", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ ...promptData, is_public: isPublic }), + }); + + if (!response.ok) { + setPopup({ message: "Failed to create input prompt", type: "error" }); + } + + refreshPrompts(); + return response.json(); + }; + + const editInputPrompt = async ( + promptId: number, + values: CreateInputPromptRequest + ) => { + try { + const response = await fetch(`/api/input_prompt/${promptId}`, { + method: "PATCH", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(values), + }); + + if (!response.ok) { + setPopup({ message: "Failed to update prompt!", type: "error" }); + } + + setNewPromptId(null); + refreshPrompts(); + } catch (err) { + setPopup({ message: `Failed to update prompt: ${err}`, type: "error" }); + } + }; + + if (isLoading) { + return ; + } + + if (error || !promptLibrary) { + return ( + + ); + } + + const handleEdit = (promptId: number) => { + setNewPromptId(promptId); + }; + + return ( +
    + {popup} + + {newPrompt && ( + setNewPrompt(false)} + /> + )} + + {newPromptId && ( + setNewPromptId(null)} + /> + )} +
    + + Create prompts that can be accessed with the `/` shortcut in + Danswer Chat.{" "} + {isPublic + ? "Prompts created here will be accessible to all users." + : "Prompts created here will be available only to you."} + +
    + +
    + + + + + +
    + +
    +
    + ); +}; diff --git a/web/src/app/admin/settings/SettingsForm.tsx b/web/src/app/admin/settings/SettingsForm.tsx index 858a5c0ff8b..03a0171363e 100644 --- a/web/src/app/admin/settings/SettingsForm.tsx +++ b/web/src/app/admin/settings/SettingsForm.tsx @@ -103,84 +103,117 @@ function IntegerInput({ export function SettingsForm() { const router = useRouter(); - const combinedSettings = useContext(SettingsContext); + const [settings, setSettings] = useState(null); const [chatRetention, setChatRetention] = useState(""); const { popup, setPopup } = usePopup(); const isEnterpriseEnabled = usePaidEnterpriseFeaturesEnabled(); + const combinedSettings = useContext(SettingsContext); + useEffect(() => { - if (combinedSettings?.settings.maximum_chat_retention_days !== undefined) { + if (combinedSettings) { + setSettings(combinedSettings.settings); setChatRetention( combinedSettings.settings.maximum_chat_retention_days?.toString() || "" ); } - }, [combinedSettings?.settings.maximum_chat_retention_days]); + }, []); - if (!combinedSettings) { + if (!settings) { return null; } - const settings = combinedSettings.settings; async function updateSettingField( updateRequests: { fieldName: keyof Settings; newValue: any }[] ) { - const newValues: any = {}; - updateRequests.forEach(({ fieldName, newValue }) => { - newValues[fieldName] = newValue; - }); + // Optimistically update the local state + const newSettings: Settings | null = settings + ? { + ...settings, + ...updateRequests.reduce((acc, { fieldName, newValue }) => { + acc[fieldName] = newValue ?? settings[fieldName]; + return acc; + }, {} as Partial), + } + : null; + setSettings(newSettings); + + try { + const response = await fetch("/api/admin/settings", { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(newSettings), + }); + + if (!response.ok) { + const errorMsg = (await response.json()).detail; + throw new Error(errorMsg); + } - const response = await fetch("/api/admin/settings", { - method: "PUT", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - ...settings, - ...newValues, - }), - }); - if (response.ok) { router.refresh(); - } else { - const errorMsg = (await response.json()).detail; - alert(`Failed to update settings. ${errorMsg}`); + setPopup({ + message: "Settings updated successfully!", + type: "success", + }); + } catch (error) { + // Revert the optimistic update + setSettings(settings); + console.error("Error updating settings:", error); + setPopup({ + message: `Failed to update settings`, + type: "error", + }); + } + } + + function handleToggleSettingsField( + fieldName: keyof Settings, + checked: boolean + ) { + const updates: { fieldName: keyof Settings; newValue: any }[] = [ + { fieldName, newValue: checked }, + ]; + + // If we're disabling a page, check if we need to update the default page + if ( + !checked && + (fieldName === "search_page_enabled" || fieldName === "chat_page_enabled") + ) { + const otherPageField = + fieldName === "search_page_enabled" + ? "chat_page_enabled" + : "search_page_enabled"; + const otherPageEnabled = settings && settings[otherPageField]; + + if ( + otherPageEnabled && + settings?.default_page === + (fieldName === "search_page_enabled" ? "search" : "chat") + ) { + updates.push({ + fieldName: "default_page", + newValue: fieldName === "search_page_enabled" ? "chat" : "search", + }); + } } + + updateSettingField(updates); } function handleSetChatRetention() { - // Convert chatRetention to a number or null and update the global settings - const newValue = - chatRetention === "" ? null : parseInt(chatRetention.toString(), 10); + const newValue = chatRetention === "" ? null : parseInt(chatRetention, 10); updateSettingField([ - { fieldName: "maximum_chat_retention_days", newValue: newValue }, - ]) - .then(() => { - setPopup({ - message: "Chat retention settings updated successfully!", - type: "success", - }); - }) - .catch((error) => { - console.error("Error updating settings:", error); - const errorMessage = - error.response?.data?.message || error.message || "Unknown error"; - setPopup({ - message: `Failed to update settings: ${errorMessage}`, - type: "error", - }); - }); + { fieldName: "maximum_chat_retention_days", newValue }, + ]); } function handleClearChatRetention() { - setChatRetention(""); // Clear the chat retention input + setChatRetention(""); updateSettingField([ { fieldName: "maximum_chat_retention_days", newValue: null }, - ]).then(() => { - setPopup({ - message: "Chat retention cleared successfully!", - type: "success", - }); - }); + ]); } return ( @@ -190,36 +223,20 @@ export function SettingsForm() { { - const updates: any[] = [ - { fieldName: "search_page_enabled", newValue: e.target.checked }, - ]; - if (!e.target.checked && settings.default_page === "search") { - updates.push({ fieldName: "default_page", newValue: "chat" }); - } - updateSettingField(updates); - }} + onChange={(e) => + handleToggleSettingsField("search_page_enabled", e.target.checked) + } /> { - const updates: any[] = [ - { fieldName: "chat_page_enabled", newValue: e.target.checked }, - ]; - if (!e.target.checked && settings.default_page === "chat") { - updates.push({ fieldName: "default_page", newValue: "search" }); - } - updateSettingField(updates); - }} + onChange={(e) => + handleToggleSettingsField("chat_page_enabled", e.target.checked) + } /> + {isEnterpriseEnabled && ( <> Chat Settings @@ -246,10 +264,8 @@ export function SettingsForm() { value={chatRetention === "" ? null : Number(chatRetention)} onChange={(e) => { const numValue = parseInt(e.target.value, 10); - if (numValue >= 1) { - setChatRetention(numValue.toString()); - } else if (e.target.value === "") { - setChatRetention(""); + if (numValue >= 1 || e.target.value === "") { + setChatRetention(e.target.value); } }} id="chatRetentionInput" diff --git a/web/src/app/admin/settings/interfaces.ts b/web/src/app/admin/settings/interfaces.ts index 02372ce2c74..247bfd09d83 100644 --- a/web/src/app/admin/settings/interfaces.ts +++ b/web/src/app/admin/settings/interfaces.ts @@ -3,13 +3,25 @@ export interface Settings { search_page_enabled: boolean; default_page: "search" | "chat"; maximum_chat_retention_days: number | null; + notifications: Notification[]; + needs_reindexing: boolean; +} + +export interface Notification { + id: number; + notif_type: string; + dismissed: boolean; + last_shown: string; + first_shown: string; } export interface EnterpriseSettings { application_name: string | null; use_custom_logo: boolean; + use_custom_logotype: boolean; // custom Chat components + custom_lower_disclaimer_content: string | null; custom_header_content: string | null; custom_popup_header: string | null; custom_popup_content: string | null; @@ -19,4 +31,6 @@ export interface CombinedSettings { settings: Settings; enterpriseSettings: EnterpriseSettings | null; customAnalyticsScript: string | null; + isMobile?: boolean; + webVersion: string | null; } diff --git a/web/src/app/admin/settings/page.tsx b/web/src/app/admin/settings/page.tsx index eff4262f0d8..9cb2f630e29 100644 --- a/web/src/app/admin/settings/page.tsx +++ b/web/src/app/admin/settings/page.tsx @@ -1,14 +1,15 @@ import { AdminPageTitle } from "@/components/admin/Title"; -import { FiSettings } from "react-icons/fi"; + import { SettingsForm } from "./SettingsForm"; import { Text } from "@tremor/react"; +import { SettingsIcon } from "@/components/icons/icons"; export default async function Page() { return (
    } + icon={} /> diff --git a/web/src/app/admin/standard-answer/StandardAnswerCreationForm.tsx b/web/src/app/admin/standard-answer/StandardAnswerCreationForm.tsx index 682c43a9a95..9e4ea1cb9e4 100644 --- a/web/src/app/admin/standard-answer/StandardAnswerCreationForm.tsx +++ b/web/src/app/admin/standard-answer/StandardAnswerCreationForm.tsx @@ -93,11 +93,13 @@ export const StandardAnswerCreationForm = ({ placeholder="e.g. Wifi Password" autoCompleteDisabled={true} /> - +
    + +
    )} {paginatedStandardAnswers.length > 0 && ( -
    - -
    + <> +
    + + Ensure that you have added the category to the relevant{" "} + + Slack bot + + . + +
    +
    + +
    + )}
    @@ -356,8 +367,10 @@ const Main = () => { {popup} - Here you can manage the standard answers that are used to answer - questions based on keywords or phrases. + Manage the standard answers for pre-defined questions. +
    + Note: Currently, only questions asked from Slack can receive standard + answers.
    {standardAnswers.length == 0 && ( Add your first standard answer below! diff --git a/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx b/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx index c80b78acbbe..085114a836b 100644 --- a/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx +++ b/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx @@ -156,17 +156,14 @@ export const CreateRateLimitModal = ({ type="number" placeholder="" /> -
    - -
    + )} diff --git a/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx b/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx index 71e550a1c4f..ec5fdc60028 100644 --- a/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx +++ b/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx @@ -24,6 +24,7 @@ type TokenRateLimitTableArgs = { description?: string; fetchUrl: string; hideHeading?: boolean; + isAdmin: boolean; }; export const TokenRateLimitTable = ({ @@ -32,6 +33,7 @@ export const TokenRateLimitTable = ({ description, fetchUrl, hideHeading, + isAdmin, }: TokenRateLimitTableArgs) => { const shouldRenderGroupName = () => tokenRateLimits.length > 0 && tokenRateLimits[0].group_name !== undefined; @@ -79,7 +81,9 @@ export const TokenRateLimitTable = ({ {!hideHeading && description && ( {description} )} - +
    Enabled @@ -88,7 +92,7 @@ export const TokenRateLimitTable = ({ )} Time Window (Hours) Token Budget (Thousands) - Delete + {isAdmin && Delete} @@ -96,15 +100,33 @@ export const TokenRateLimitTable = ({ return ( -
    handleEnabledChange(tokenRateLimit.token_id)} - className="px-1 py-0.5 hover:bg-hover-light rounded flex cursor-pointer select-none w-24 flex" - > -
    - -

    - {tokenRateLimit.enabled ? "Enabled" : "Disabled"} -

    +
    +
    handleEnabledChange(tokenRateLimit.token_id) + : undefined + } + className={`px-1 py-0.5 rounded select-none w-24 ${ + isAdmin + ? "hover:bg-hover-light cursor-pointer" + : "opacity-50" + }`} + > +
    + + handleEnabledChange(tokenRateLimit.token_id) + : undefined + } + /> +

    + {tokenRateLimit.enabled ? "Enabled" : "Disabled"} +

    +
    @@ -113,13 +135,23 @@ export const TokenRateLimitTable = ({ {tokenRateLimit.group_name} )} - {tokenRateLimit.period_hours} - {tokenRateLimit.token_budget} - handleDelete(tokenRateLimit.token_id)} - /> + {tokenRateLimit.period_hours + + " hour" + + (tokenRateLimit.period_hours > 1 ? "s" : "")} + + {tokenRateLimit.token_budget + " thousand tokens"} + + {isAdmin && ( + +
    + handleDelete(tokenRateLimit.token_id)} + /> +
    +
    + )} ); })} @@ -135,12 +167,14 @@ export const GenericTokenRateLimitTable = ({ description, hideHeading, responseMapper, + isAdmin = true, }: { fetchUrl: string; title?: string; description?: string; hideHeading?: boolean; responseMapper?: (data: any) => TokenRateLimitDisplay[]; + isAdmin?: boolean; }) => { const { data, isLoading, error } = useSWR(fetchUrl, errorHandlingFetcher); @@ -164,6 +198,7 @@ export const GenericTokenRateLimitTable = ({ title={title} description={description} hideHeading={hideHeading} + isAdmin={isAdmin} /> ); }; diff --git a/web/src/app/admin/token-rate-limits/page.tsx b/web/src/app/admin/token-rate-limits/page.tsx index 7aeff20c672..fb4b711a2e0 100644 --- a/web/src/app/admin/token-rate-limits/page.tsx +++ b/web/src/app/admin/token-rate-limits/page.tsx @@ -23,6 +23,7 @@ import { mutate } from "swr"; import { usePopup } from "@/components/admin/connectors/Popup"; import { CreateRateLimitModal } from "./CreateRateLimitModal"; import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; +import { ShieldIcon } from "@/components/icons/icons"; const BASE_URL = "/api/admin/token-rate-limits"; const GLOBAL_TOKEN_FETCH_URL = `${BASE_URL}/global`; @@ -219,8 +220,10 @@ function Main() { export default function Page() { return (
    - } /> - + } + />
    ); diff --git a/web/src/app/admin/tools/edit/[toolId]/page.tsx b/web/src/app/admin/tools/edit/[toolId]/page.tsx index 8dd54be46b3..8ae1e908a2d 100644 --- a/web/src/app/admin/tools/edit/[toolId]/page.tsx +++ b/web/src/app/admin/tools/edit/[toolId]/page.tsx @@ -6,6 +6,7 @@ import { DeleteToolButton } from "./DeleteToolButton"; import { FiTool } from "react-icons/fi"; import { AdminPageTitle } from "@/components/admin/Title"; import { BackButton } from "@/components/BackButton"; +import { ToolIcon } from "@/components/icons/icons"; export default async function Page({ params }: { params: { toolId: string } }) { const tool = await fetchToolByIdSS(params.toolId); @@ -46,7 +47,7 @@ export default async function Page({ params }: { params: { toolId: string } }) { } + icon={} /> {body} diff --git a/web/src/app/admin/tools/new/page.tsx b/web/src/app/admin/tools/new/page.tsx index 5d1723f96ac..efff155be58 100644 --- a/web/src/app/admin/tools/new/page.tsx +++ b/web/src/app/admin/tools/new/page.tsx @@ -3,6 +3,7 @@ import { ToolEditor } from "@/app/admin/tools/ToolEditor"; import { BackButton } from "@/components/BackButton"; import { AdminPageTitle } from "@/components/admin/Title"; +import { ToolIcon } from "@/components/icons/icons"; import { Card } from "@tremor/react"; import { FiTool } from "react-icons/fi"; @@ -13,7 +14,7 @@ export default function NewToolPage() { } + icon={} /> diff --git a/web/src/app/admin/tools/page.tsx b/web/src/app/admin/tools/page.tsx index 7b9edf7abe0..543f89ac367 100644 --- a/web/src/app/admin/tools/page.tsx +++ b/web/src/app/admin/tools/page.tsx @@ -6,6 +6,7 @@ import { Divider, Text, Title } from "@tremor/react"; import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; import { AdminPageTitle } from "@/components/admin/Title"; +import { ToolIcon } from "@/components/icons/icons"; export default async function Page() { const toolResponse = await fetchSS("/tool"); @@ -24,7 +25,7 @@ export default async function Page() { return (
    } + icon={} title="Tools" /> diff --git a/web/src/app/admin/users/page.tsx b/web/src/app/admin/users/page.tsx index a61f176e24c..0c258cd857c 100644 --- a/web/src/app/admin/users/page.tsx +++ b/web/src/app/admin/users/page.tsx @@ -194,11 +194,14 @@ const AddUserButton = ({ Invite Users
    + {modal && ( setModal(false)}>
    Add the email addresses to import, separated by whitespaces. + Invited users will be able to login to this domain with their + email address.
    diff --git a/web/src/app/assistants/AssistantSharedStatus.tsx b/web/src/app/assistants/AssistantSharedStatus.tsx index c5127c87e91..b10c23096e8 100644 --- a/web/src/app/assistants/AssistantSharedStatus.tsx +++ b/web/src/app/assistants/AssistantSharedStatus.tsx @@ -1,7 +1,14 @@ import { User } from "@/lib/types"; import { Persona } from "../admin/assistants/interfaces"; import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership"; -import { FiLock, FiUnlock } from "react-icons/fi"; +import { + FiImage, + FiLock, + FiMoreHorizontal, + FiSearch, + FiUnlock, +} from "react-icons/fi"; +import { CustomTooltip } from "@/components/tooltip/CustomTooltip"; export function AssistantSharedStatusDisplay({ assistant, @@ -49,6 +56,20 @@ export function AssistantSharedStatusDisplay({ )}
    )} +
    + Powers:{" "} + {assistant.tools.length == 0 ? ( +

    None

    + ) : ( + assistant.tools.map((tool, ind) => { + if (tool.name === "SearchTool") { + return ; + } else if (tool.name === "ImageGenerationTool") { + return ; + } + }) + )} +
    ); } diff --git a/web/src/app/assistants/SidebarWrapper.tsx b/web/src/app/assistants/SidebarWrapper.tsx new file mode 100644 index 00000000000..2feae589240 --- /dev/null +++ b/web/src/app/assistants/SidebarWrapper.tsx @@ -0,0 +1,166 @@ +"use client"; + +import { HistorySidebar } from "@/app/chat/sessionSidebar/HistorySidebar"; +import { ChatSession } from "@/app/chat/interfaces"; +import { Folder } from "@/app/chat/folders/interfaces"; +import { User } from "@/lib/types"; +import Cookies from "js-cookie"; +import { SIDEBAR_TOGGLED_COOKIE_NAME } from "@/components/resizable/constants"; +import { ReactNode, useContext, useEffect, useRef, useState } from "react"; +import { useSidebarVisibility } from "@/components/chat_search/hooks"; +import FunctionalHeader from "@/components/chat_search/Header"; +import { useRouter } from "next/navigation"; +import { pageType } from "../chat/sessionSidebar/types"; +import FixedLogo from "../chat/shared_chat_search/FixedLogo"; +import { SettingsContext } from "@/components/settings/SettingsProvider"; + +interface SidebarWrapperProps { + chatSessions?: ChatSession[]; + folders?: Folder[]; + initiallyToggled: boolean; + openedFolders?: { [key: number]: boolean }; + content: (props: T) => ReactNode; + headerProps: { + page: pageType; + user: User | null; + }; + contentProps: T; + page: pageType; + size?: "sm" | "lg"; +} + +export default function SidebarWrapper({ + chatSessions, + initiallyToggled, + folders, + openedFolders, + page, + headerProps, + contentProps, + content, + size = "sm", +}: SidebarWrapperProps) { + const [toggledSidebar, setToggledSidebar] = useState(initiallyToggled); + const [showDocSidebar, setShowDocSidebar] = useState(false); // State to track if sidebar is open + // Used to maintain a "time out" for history sidebar so our existing refs can have time to process change + const [untoggled, setUntoggled] = useState(false); + + const explicitlyUntoggle = () => { + setShowDocSidebar(false); + + setUntoggled(true); + setTimeout(() => { + setUntoggled(false); + }, 200); + }; + + const toggleSidebar = () => { + Cookies.set( + SIDEBAR_TOGGLED_COOKIE_NAME, + String(!toggledSidebar).toLocaleLowerCase() + ), + { + path: "/", + }; + setToggledSidebar((toggledSidebar) => !toggledSidebar); + }; + + const sidebarElementRef = useRef(null); + + const settings = useContext(SettingsContext); + useSidebarVisibility({ + toggledSidebar, + sidebarElementRef, + showDocSidebar, + setShowDocSidebar, + mobile: settings?.isMobile, + }); + + const innerSidebarElementRef = useRef(null); + const router = useRouter(); + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.metaKey || event.ctrlKey) { + switch (event.key.toLowerCase()) { + case "e": + event.preventDefault(); + toggleSidebar(); + break; + } + } + }; + + window.addEventListener("keydown", handleKeyDown); + return () => { + window.removeEventListener("keydown", handleKeyDown); + }; + }, [router]); + + return ( +
    +
    +
    + +
    +
    + +
    + +
    +
    + +
    + {content(contentProps)} +
    +
    +
    + +
    + ); +} diff --git a/web/src/app/assistants/ToolsDisplay.tsx b/web/src/app/assistants/ToolsDisplay.tsx index f0a331a21e5..10c25b640c9 100644 --- a/web/src/app/assistants/ToolsDisplay.tsx +++ b/web/src/app/assistants/ToolsDisplay.tsx @@ -1,10 +1,14 @@ import { Bubble } from "@/components/Bubble"; import { ToolSnapshot } from "@/lib/tools/interfaces"; -import { FiImage, FiSearch, FiGlobe } from "react-icons/fi"; +import { FiImage, FiSearch, FiGlobe, FiMoreHorizontal } from "react-icons/fi"; +import { Persona } from "../admin/assistants/interfaces"; +import { CustomTooltip } from "@/components/tooltip/CustomTooltip"; +import { useState } from "react"; export function ToolsDisplay({ tools }: { tools: ToolSnapshot[] }) { return (
    +

    Tools:

    {tools.map((tool) => { let toolName = tool.name; let toolIcon = null; @@ -32,3 +36,94 @@ export function ToolsDisplay({ tools }: { tools: ToolSnapshot[] }) {
    ); } + +export function AssistantTools({ + assistant, + list, + hovered, +}: { + assistant: Persona; + list?: boolean; + hovered?: boolean; +}) { + return ( +
    + 0 && "py-1"} ${!list ? "font-semibold" : "text-subtle text-sm"}`} + > + Tools: + {" "} + {assistant.tools.length == 0 ? ( +

    None

    + ) : ( +
    + {assistant.tools.map((tool, ind) => { + if (tool.name === "SearchTool") { + return ( +
    +
    + + Search +
    +
    + ); + } else if (tool.name === "ImageGenerationTool") { + return ( +
    +
    + + Image Generation +
    +
    + ); + } else { + return ( +
    +
    {tool.name}
    +
    + ); + } + })} +
    + )} +
    + ); +} diff --git a/web/src/app/assistants/edit/[id]/page.tsx b/web/src/app/assistants/edit/[id]/page.tsx index 2b56bb00e48..efcb2ccedee 100644 --- a/web/src/app/assistants/edit/[id]/page.tsx +++ b/web/src/app/assistants/edit/[id]/page.tsx @@ -29,7 +29,6 @@ export default async function Page({ params }: { params: { id: string } }) { redirectType={SuccessfulPersonaUpdateRedirectType.CHAT} /> - Delete Assistant Click the button below to permanently delete this assistant. @@ -58,7 +57,6 @@ export default async function Page({ params }: { params: { id: string } }) {
    - {body} ); diff --git a/web/src/app/assistants/gallery/AssistantsGallery.tsx b/web/src/app/assistants/gallery/AssistantsGallery.tsx index 4b9eaff718b..d4882aa6081 100644 --- a/web/src/app/assistants/gallery/AssistantsGallery.tsx +++ b/web/src/app/assistants/gallery/AssistantsGallery.tsx @@ -15,13 +15,14 @@ import { } from "@/lib/assistants/updateAssistantPreferences"; import { usePopup } from "@/components/admin/connectors/Popup"; import { useRouter } from "next/navigation"; -import { ToolsDisplay } from "../ToolsDisplay"; +import { AssistantTools, ToolsDisplay } from "../ToolsDisplay"; export function AssistantsGallery({ assistants, user, }: { assistants: Persona[]; + user: User | null; }) { function filterAssistants(assistants: Persona[], query: string): Persona[] { @@ -99,10 +100,10 @@ export function AssistantsGallery({ className=" text-xl font-semibold - mb-2 my-auto ml-2 text-strong + line-clamp-2 " > {assistant.name} @@ -150,9 +151,9 @@ export function AssistantsGallery({ } }} size="xs" - color="red" + color="blue" > - Remove + Deselect ) : ( + } + popover={ +
    +

    Banner Content

    + +
    + } + side="bottom" + align="end" + /> + )} + diff --git a/web/src/app/chat/ChatIntro.tsx b/web/src/app/chat/ChatIntro.tsx index 926656958e2..27353aa340f 100644 --- a/web/src/app/chat/ChatIntro.tsx +++ b/web/src/app/chat/ChatIntro.tsx @@ -35,17 +35,13 @@ export function ChatIntro({ }) { const availableSourceMetadata = getSourceMetadataForSources(availableSources); - const [displaySources, setDisplaySources] = useState(false); - return ( <>
    -
    +
    - - -
    +
    {selectedPersona?.name || "How can I help you today?"}
    {selectedPersona && ( diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx index 6321fa9a3ea..3ee22d4d74f 100644 --- a/web/src/app/chat/ChatPage.tsx +++ b/web/src/app/chat/ChatPage.tsx @@ -5,24 +5,29 @@ import { BackendChatSession, BackendMessage, ChatFileType, + ChatSession, ChatSessionSharedStatus, DocumentsResponse, FileDescriptor, ImageGenerationDisplay, Message, + MessageResponseIDInfo, RetrievalType, StreamingError, ToolCallMetadata, } from "./interfaces"; -import { ChatSidebar } from "./sessionSidebar/ChatSidebar"; + +import Prism from "prismjs"; +import Cookies from "js-cookie"; +import { HistorySidebar } from "./sessionSidebar/HistorySidebar"; import { Persona } from "../admin/assistants/interfaces"; import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; import { buildChatUrl, buildLatestMessageChain, checkAnyAssistantHasSearch, createChatSession, + deleteChatSession, getCitedDocumentsFromMessage, getHumanAndAIMessageFromMessageNumber, getLastSuccessfulMessageId, @@ -38,88 +43,221 @@ import { uploadFilesForChat, useScrollonStream, } from "./lib"; -import { useContext, useEffect, useRef, useState } from "react"; +import { + Dispatch, + SetStateAction, + useContext, + useEffect, + useRef, + useState, +} from "react"; import { usePopup } from "@/components/admin/connectors/Popup"; import { SEARCH_PARAM_NAMES, shouldSubmitOnLoad } from "./searchParams"; import { useDocumentSelection } from "./useDocumentSelection"; -import { useFilters, useLlmOverride } from "@/lib/hooks"; +import { LlmOverride, useFilters, useLlmOverride } from "@/lib/hooks"; import { computeAvailableFilters } from "@/lib/filters"; -import { FeedbackType } from "./types"; +import { ChatState, FeedbackType, RegenerationState } from "./types"; import { DocumentSidebar } from "./documentSidebar/DocumentSidebar"; import { DanswerInitializingLoader } from "@/components/DanswerInitializingLoader"; import { FeedbackModal } from "./modal/FeedbackModal"; import { ShareChatSessionModal } from "./modal/ShareChatSessionModal"; -import { ChatPersonaSelector } from "./ChatPersonaSelector"; -import { FiArrowDown, FiShare2 } from "react-icons/fi"; +import { FiArrowDown } from "react-icons/fi"; import { ChatIntro } from "./ChatIntro"; import { AIMessage, HumanMessage } from "./message/Messages"; -import { ThreeDots } from "react-loader-spinner"; import { StarterMessage } from "./StarterMessage"; import { AnswerPiecePacket, DanswerDocument } from "@/lib/search/interfaces"; import { buildFilters } from "@/lib/search/utils"; import { SettingsContext } from "@/components/settings/SettingsProvider"; import Dropzone from "react-dropzone"; -import { checkLLMSupportsImageInput, getFinalLLM } from "@/lib/llm/utils"; +import { + checkLLMSupportsImageInput, + getFinalLLM, + destructureValue, + getLLMProviderOverrideForPersona, +} from "@/lib/llm/utils"; + import { ChatInputBar } from "./input/ChatInputBar"; -import { ConfigurationModal } from "./modal/configuration/ConfigurationModal"; import { useChatContext } from "@/components/context/ChatContext"; -import { UserDropdown } from "@/components/UserDropdown"; import { v4 as uuidv4 } from "uuid"; import { orderAssistantsForUser } from "@/lib/assistants/orderAssistants"; import { ChatPopup } from "./ChatPopup"; -import { ChatBanner } from "./ChatBanner"; -import { TbLayoutSidebarRightExpand } from "react-icons/tb"; -import { SIDEBAR_WIDTH_CONST } from "@/lib/constants"; -import ResizableSection from "@/components/resizable/ResizableSection"; +import FunctionalHeader from "@/components/chat_search/Header"; +import { useSidebarVisibility } from "@/components/chat_search/hooks"; +import { SIDEBAR_TOGGLED_COOKIE_NAME } from "@/components/resizable/constants"; +import FixedLogo from "./shared_chat_search/FixedLogo"; +import { getSecondsUntilExpiration } from "@/lib/time"; +import { SetDefaultModelModal } from "./modal/SetDefaultModelModal"; +import { DeleteEntityModal } from "../../components/modals/DeleteEntityModal"; +import { MinimalMarkdown } from "@/components/chat_search/MinimalMarkdown"; +import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal"; + +import { SEARCH_TOOL_NAME } from "./tools/constants"; +import { useUser } from "@/components/user/UserProvider"; const TEMP_USER_MESSAGE_ID = -1; const TEMP_ASSISTANT_MESSAGE_ID = -2; const SYSTEM_MESSAGE_ID = -3; export function ChatPage({ + toggle, documentSidebarInitialWidth, - defaultSelectedPersonaId, + defaultSelectedAssistantId, + toggledSidebar, }: { + toggle: (toggled?: boolean) => void; documentSidebarInitialWidth?: number; - defaultSelectedPersonaId?: number; + defaultSelectedAssistantId?: number; + toggledSidebar: boolean; }) { - const [configModalActiveTab, setConfigModalActiveTab] = useState< - string | null - >(null); + const router = useRouter(); + const searchParams = useSearchParams(); + let { - user, chatSessions, availableSources, availableDocumentSets, - availablePersonas, + availableAssistants, llmProviders, folders, openedFolders, + userInputPrompts, } = useChatContext(); - const filteredAssistants = orderAssistantsForUser(availablePersonas, user); - - const [selectedAssistant, setSelectedAssistant] = useState( - null - ); - const [alternativeGeneratingAssistant, setAlternativeGeneratingAssistant] = - useState(null); + const { user, refreshUser } = useUser(); - const router = useRouter(); - const searchParams = useSearchParams(); + // chat session const existingChatIdRaw = searchParams.get("chatId"); + const currentPersonaId = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID); + const existingChatSessionId = existingChatIdRaw ? parseInt(existingChatIdRaw) : null; const selectedChatSession = chatSessions.find( (chatSession) => chatSession.id === existingChatSessionId ); + const chatSessionIdRef = useRef(existingChatSessionId); - const llmOverrideManager = useLlmOverride(selectedChatSession); + // Only updates on session load (ie. rename / switching chat session) + // Useful for determining which session has been loaded (i.e. still on `new, empty session` or `previous session`) + const loadedIdSessionRef = useRef(existingChatSessionId); + + // Assistants + const filteredAssistants = orderAssistantsForUser(availableAssistants, user); + + const existingChatSessionAssistantId = selectedChatSession?.persona_id; + const [selectedAssistant, setSelectedAssistant] = useState< + Persona | undefined + >( + // NOTE: look through available assistants here, so that even if the user + // has hidden this assistant it still shows the correct assistant when + // going back to an old chat session + existingChatSessionAssistantId !== undefined + ? availableAssistants.find( + (assistant) => assistant.id === existingChatSessionAssistantId + ) + : defaultSelectedAssistantId !== undefined + ? availableAssistants.find( + (assistant) => assistant.id === defaultSelectedAssistantId + ) + : undefined + ); + + // Gather default temperature settings + const search_param_temperature = searchParams.get( + SEARCH_PARAM_NAMES.TEMPERATURE + ); + const defaultTemperature = search_param_temperature + ? parseFloat(search_param_temperature) + : selectedAssistant?.tools.some( + (tool) => + tool.in_code_tool_id === "SearchTool" || + tool.in_code_tool_id === "InternetSearchTool" + ) + ? 0 + : 0.7; + + const setSelectedAssistantFromId = (assistantId: number) => { + // NOTE: also intentionally look through available assistants here, so that + // even if the user has hidden an assistant they can still go back to it + // for old chats + setSelectedAssistant( + availableAssistants.find((assistant) => assistant.id === assistantId) + ); + }; + + const llmOverrideManager = useLlmOverride( + user?.preferences.default_model, + selectedChatSession, + defaultTemperature + ); + + const [alternativeAssistant, setAlternativeAssistant] = + useState(null); + + const liveAssistant = + alternativeAssistant || + selectedAssistant || + filteredAssistants[0] || + availableAssistants[0]; + useEffect(() => { + if (!loadedIdSessionRef.current && !currentPersonaId) { + return; + } + + const personaDefault = getLLMProviderOverrideForPersona( + liveAssistant, + llmProviders + ); + + if (personaDefault) { + llmOverrideManager.setLlmOverride(personaDefault); + } else if (user?.preferences.default_model) { + llmOverrideManager.setLlmOverride( + destructureValue(user?.preferences.default_model) + ); + } + }, [liveAssistant]); + + const stopGenerating = () => { + const currentSession = currentSessionId(); + const controller = abortControllers.get(currentSession); + if (controller) { + controller.abort(); + setAbortControllers((prev) => { + const newControllers = new Map(prev); + newControllers.delete(currentSession); + return newControllers; + }); + } - const existingChatSessionPersonaId = selectedChatSession?.persona_id; + const lastMessage = messageHistory[messageHistory.length - 1]; + if ( + lastMessage && + lastMessage.type === "assistant" && + lastMessage.toolCalls[0] && + lastMessage.toolCalls[0].tool_result === undefined + ) { + const newCompleteMessageMap = new Map( + currentMessageMap(completeMessageDetail) + ); + const updatedMessage = { ...lastMessage, toolCalls: [] }; + newCompleteMessageMap.set(lastMessage.messageId, updatedMessage); + updateCompleteMessageDetail(currentSession, newCompleteMessageMap); + } + + updateChatState("input", currentSession); + }; + // this is for "@"ing assistants + + // this is used to track which assistant is being used to generate the current message + // for example, this would come into play when: + // 1. default assistant is `Danswer` + // 2. we "@"ed the `GPT` assistant and sent a message + // 3. while the `GPT` assistant message is generating, we "@" the `Paraphrase` assistant + const [alternativeGeneratingAssistant, setAlternativeGeneratingAssistant] = + useState(null); // used to track whether or not the initial "submit on load" has been performed // this only applies if `?submit-on-load=true` or `?submit-on-load=1` is in the URL @@ -134,11 +272,20 @@ export function ChatPage({ existingChatSessionId !== null ); + const [isReady, setIsReady] = useState(false); + useEffect(() => { + Prism.highlightAll(); + setIsReady(true); + }, []); + // this is triggered every time the user switches which chat // session they are using useEffect(() => { const priorChatSessionId = chatSessionIdRef.current; + const loadedSessionId = loadedIdSessionRef.current; chatSessionIdRef.current = existingChatSessionId; + loadedIdSessionRef.current = existingChatSessionId; + textAreaRef.current?.focus(); // only clear things if we're going from one chat session to another @@ -167,28 +314,17 @@ export function ChatPage({ if (chatSessionIdRef.current !== null) { setHasPerformedInitialScroll(false); } - - if (isStreaming) { - setIsCancelled(true); - } } async function initialSessionFetch() { if (existingChatSessionId === null) { setIsFetchingChatMessages(false); - if (defaultSelectedPersonaId !== undefined) { - setSelectedPersona( - filteredAssistants.find( - (persona) => persona.id === defaultSelectedPersonaId - ) - ); + if (defaultSelectedAssistantId !== undefined) { + setSelectedAssistantFromId(defaultSelectedAssistantId); } else { - setSelectedPersona(undefined); + setSelectedAssistant(undefined); } - setCompleteMessageDetail({ - sessionId: null, - messageMap: new Map(), - }); + updateCompleteMessageDetail(null, new Map()); setChatSessionSharedStatus(ChatSessionSharedStatus.Private); // if we're supposed to submit on initial load, then do that here @@ -201,28 +337,28 @@ export function ChatPage({ } return; } - + clearSelectedDocuments(); setIsFetchingChatMessages(true); const response = await fetch( `/api/chat/get-chat-session/${existingChatSessionId}` ); const chatSession = (await response.json()) as BackendChatSession; - - setSelectedPersona( - filteredAssistants.find( - (persona) => persona.id === chatSession.persona_id - ) - ); + setSelectedAssistantFromId(chatSession.persona_id); const newMessageMap = processRawChatHistory(chatSession.messages); const newMessageHistory = buildLatestMessageChain(newMessageMap); - // if the last message is an error, don't overwrite it - if (messageHistory[messageHistory.length - 1]?.type !== "error") { - setCompleteMessageDetail({ - sessionId: chatSession.chat_session_id, - messageMap: newMessageMap, - }); + + // Update message history except for edge where where + // last message is an error and we're on a new chat. + // This corresponds to a "renaming" of chat, which occurs after first message + // stream + if ( + (messageHistory[messageHistory.length - 1]?.type !== "error" || + loadedSessionId != null) && + !currentChatAnswering() + ) { + updateCompleteMessageDetail(chatSession.chat_session_id, newMessageMap); const latestMessageId = newMessageHistory[newMessageHistory.length - 1]?.messageId; @@ -265,26 +401,36 @@ export function ChatPage({ initialSessionFetch(); }, [existingChatSessionId]); - const [usedSidebarWidth, setUsedSidebarWidth] = useState( - documentSidebarInitialWidth || parseInt(SIDEBAR_WIDTH_CONST) + const [message, setMessage] = useState( + searchParams.get(SEARCH_PARAM_NAMES.USER_MESSAGE) || "" ); - const updateSidebarWidth = (newWidth: number) => { - setUsedSidebarWidth(newWidth); - if (sidebarElementRef.current && innerSidebarElementRef.current) { - sidebarElementRef.current.style.transition = ""; - sidebarElementRef.current.style.width = `${newWidth}px`; - innerSidebarElementRef.current.style.width = `${newWidth}px`; - } + const [completeMessageDetail, setCompleteMessageDetail] = useState< + Map> + >(new Map()); + + const updateCompleteMessageDetail = ( + sessionId: number | null, + messageMap: Map + ) => { + setCompleteMessageDetail((prevState) => { + const newState = new Map(prevState); + newState.set(sessionId, messageMap); + return newState; + }); + }; + + const currentMessageMap = ( + messageDetail: Map> + ) => { + return ( + messageDetail.get(chatSessionIdRef.current) || new Map() + ); + }; + const currentSessionId = (): number => { + return chatSessionIdRef.current!; }; - const [message, setMessage] = useState( - searchParams.get(SEARCH_PARAM_NAMES.USER_MESSAGE) || "" - ); - const [completeMessageDetail, setCompleteMessageDetail] = useState<{ - sessionId: number | null; - messageMap: Map; - }>({ sessionId: null, messageMap: new Map() }); const upsertToCompleteMessageMap = ({ messages, completeMessageMapOverride, @@ -302,8 +448,9 @@ export function ChatPage({ }) => { // deep copy const frozenCompleteMessageMap = - completeMessageMapOverride || completeMessageDetail.messageMap; + completeMessageMapOverride || currentMessageMap(completeMessageDetail); const newCompleteMessageMap = structuredClone(frozenCompleteMessageMap); + if (newCompleteMessageMap.size === 0) { const systemMessageId = messages[0].parentMessageId || SYSTEM_MESSAGE_ID; const firstMessageId = messages[0].messageId; @@ -323,6 +470,7 @@ export function ChatPage({ ); messages[0].parentMessageId = systemMessageId; } + messages.forEach((message) => { const idToReplace = replacementsMap?.get(message.messageId); if (idToReplace) { @@ -338,7 +486,6 @@ export function ChatPage({ } newCompleteMessageMap.set(message.messageId, message); }); - // if specified, make these new message the latest of the current message chain if (makeLatestChildMessage) { const currentMessageChain = buildLatestMessageChain( @@ -351,18 +498,134 @@ export function ChatPage({ )!.latestChildMessageId = messages[0].messageId; } } + const newCompleteMessageDetail = { - sessionId: chatSessionId || completeMessageDetail.sessionId, + sessionId: chatSessionId || currentSessionId(), messageMap: newCompleteMessageMap, }; - setCompleteMessageDetail(newCompleteMessageDetail); + + updateCompleteMessageDetail( + chatSessionId || currentSessionId(), + newCompleteMessageMap + ); return newCompleteMessageDetail; }; const messageHistory = buildLatestMessageChain( - completeMessageDetail.messageMap + currentMessageMap(completeMessageDetail) ); - const [isStreaming, setIsStreaming] = useState(false); + + const [submittedMessage, setSubmittedMessage] = useState(""); + + const [chatState, setChatState] = useState>( + new Map([[chatSessionIdRef.current, "input"]]) + ); + + const [scrollHeight, setScrollHeight] = useState>( + new Map([[chatSessionIdRef.current, 0]]) + ); + const currentScrollHeight = () => { + return scrollHeight.get(currentSessionId()); + }; + + const retrieveCurrentScrollHeight = (): number | null => { + return scrollHeight.get(currentSessionId()) || null; + }; + + const [regenerationState, setRegenerationState] = useState< + Map + >(new Map([[null, null]])); + + const [abortControllers, setAbortControllers] = useState< + Map + >(new Map()); + + // Updates "null" session values to new session id for + // regeneration, chat, and abort controller state, messagehistory + const updateStatesWithNewSessionId = (newSessionId: number) => { + const updateState = ( + setState: Dispatch>>, + defaultValue?: any + ) => { + setState((prevState) => { + const newState = new Map(prevState); + const existingState = newState.get(null); + if (existingState !== undefined) { + newState.set(newSessionId, existingState); + newState.delete(null); + } else if (defaultValue !== undefined) { + newState.set(newSessionId, defaultValue); + } + return newState; + }); + }; + + updateState(setRegenerationState); + updateState(setChatState); + updateState(setAbortControllers); + + // Update completeMessageDetail + setCompleteMessageDetail((prevState) => { + const newState = new Map(prevState); + const existingMessages = newState.get(null); + if (existingMessages) { + newState.set(newSessionId, existingMessages); + newState.delete(null); + } + return newState; + }); + + // Update chatSessionIdRef + chatSessionIdRef.current = newSessionId; + }; + + const updateChatState = (newState: ChatState, sessionId?: number | null) => { + setChatState((prevState) => { + const newChatState = new Map(prevState); + newChatState.set( + sessionId !== undefined ? sessionId : currentSessionId(), + newState + ); + return newChatState; + }); + }; + + const currentChatState = (): ChatState => { + return chatState.get(currentSessionId()) || "input"; + }; + + const currentChatAnswering = () => { + return ( + currentChatState() == "toolBuilding" || + currentChatState() == "streaming" || + currentChatState() == "loading" + ); + }; + + const updateRegenerationState = ( + newState: RegenerationState | null, + sessionId?: number | null + ) => { + setRegenerationState((prevState) => { + const newRegenerationState = new Map(prevState); + newRegenerationState.set( + sessionId !== undefined ? sessionId : currentSessionId(), + newState + ); + return newRegenerationState; + }); + }; + + const resetRegenerationState = (sessionId?: number | null) => { + updateRegenerationState(null, sessionId); + }; + + const currentRegenerationState = (): RegenerationState | null => { + return regenerationState.get(currentSessionId()) || null; + }; + + const currentSessionChatState = currentChatState(); + const currentSessionRegenerationState = currentRegenerationState(); // uploaded files const [currentMessageFiles, setCurrentMessageFiles] = useState< @@ -380,32 +643,18 @@ export function ChatPage({ ) : { aiMessage: null }; - const [selectedPersona, setSelectedPersona] = useState( - existingChatSessionPersonaId !== undefined - ? filteredAssistants.find( - (persona) => persona.id === existingChatSessionPersonaId - ) - : defaultSelectedPersonaId !== undefined - ? filteredAssistants.find( - (persona) => persona.id === defaultSelectedPersonaId - ) - : undefined - ); - const livePersona = - selectedPersona || filteredAssistants[0] || availablePersonas[0]; - const [chatSessionSharedStatus, setChatSessionSharedStatus] = useState(ChatSessionSharedStatus.Private); useEffect(() => { if (messageHistory.length === 0 && chatSessionIdRef.current === null) { - setSelectedPersona( + setSelectedAssistant( filteredAssistants.find( - (persona) => persona.id === defaultSelectedPersonaId + (persona) => persona.id === defaultSelectedAssistantId ) ); } - }, [defaultSelectedPersonaId]); + }, [defaultSelectedAssistantId]); const [ selectedDocuments, @@ -421,7 +670,7 @@ export function ChatPage({ useEffect(() => { async function fetchMaxTokens() { const response = await fetch( - `/api/chat/max-selected-document-tokens?persona_id=${livePersona.id}` + `/api/chat/max-selected-document-tokens?persona_id=${liveAssistant.id}` ); if (response.ok) { const maxTokens = (await response.json()).max_tokens as number; @@ -430,12 +679,12 @@ export function ChatPage({ } fetchMaxTokens(); - }, [livePersona]); + }, [liveAssistant]); const filterManager = useFilters(); const [finalAvailableSources, finalAvailableDocumentSets] = computeAvailableFilters({ - selectedPersona, + selectedPersona: selectedAssistant, availableSources, availableDocumentSets, }); @@ -447,8 +696,6 @@ export function ChatPage({ const [sharingModalVisible, setSharingModalVisible] = useState(false); - // state for cancelling streaming - const [isCancelled, setIsCancelled] = useState(false); const [aboveHorizon, setAboveHorizon] = useState(false); const scrollableDivRef = useRef(null); @@ -509,28 +756,13 @@ export function ChatPage({ } else { endDivRef.current?.scrollIntoView({ behavior: "smooth" }); } - setHasPerformedInitialScroll(true); }, 50); }; - const isCancelledRef = useRef(isCancelled); // scroll is cancelled - useEffect(() => { - isCancelledRef.current = isCancelled; - }, [isCancelled]); - const distance = 500; // distance that should "engage" the scroll const debounce = 100; // time for debouncing - useScrollonStream({ - isStreaming, - scrollableDivRef, - scrollDist, - endDivRef, - distance, - debounce, - }); - const [hasPerformedInitialScroll, setHasPerformedInitialScroll] = useState( existingChatSessionId === null ); @@ -596,23 +828,28 @@ export function ChatPage({ return this.stack.length === 0; } } + async function updateCurrentMessageFIFO( stack: CurrentMessageFIFO, params: any ) { try { - for await (const packetBunch of sendMessage(params)) { - for (const packet of packetBunch) { - stack.push(packet); + for await (const packet of sendMessage(params)) { + if (params.signal?.aborted) { + throw new Error("AbortError"); } - - if (isCancelledRef.current) { - setIsCancelled(false); - break; + stack.push(packet); + } + } catch (error: unknown) { + if (error instanceof Error) { + if (error.name === "AbortError") { + console.debug("Stream aborted"); + } else { + stack.error = error.message; } + } else { + stack.error = String(error); } - } catch (error) { - stack.error = String(error); } finally { stack.isComplete = true; } @@ -632,17 +869,38 @@ export function ChatPage({ queryOverride, forceSearch, isSeededChat, - alternativeAssistant = null, + alternativeAssistantOverride = null, + modelOverRide, + regenerationRequest, }: { messageIdToResend?: number; messageOverride?: string; queryOverride?: string; forceSearch?: boolean; isSeededChat?: boolean; - alternativeAssistant?: Persona | null; + alternativeAssistantOverride?: Persona | null; + modelOverRide?: LlmOverride; + regenerationRequest?: RegenerationRequest | null; } = {}) => { - setAlternativeGeneratingAssistant(alternativeAssistant); + let frozenSessionId = currentSessionId(); + + if (currentChatState() != "input") { + setPopup({ + message: "Please wait for the response to complete", + type: "error", + }); + + return; + } + updateRegenerationState( + regenerationRequest + ? { regenerating: true, finalMessageIndex: messageIdToResend || 0 } + : null + ); + + updateChatState("loading"); + setAlternativeGeneratingAssistant(alternativeAssistantOverride); clientScrollToBottom(); let currChatSessionId: number; let isNewSession = chatSessionIdRef.current === null; @@ -651,19 +909,27 @@ export function ChatPage({ if (isNewSession) { currChatSessionId = await createChatSession( - livePersona?.id || 0, + liveAssistant?.id || 0, searchParamBasedChatSessionName ); } else { currChatSessionId = chatSessionIdRef.current as number; } - chatSessionIdRef.current = currChatSessionId; + frozenSessionId = currChatSessionId; + + updateStatesWithNewSessionId(currChatSessionId); + + const controller = new AbortController(); + + setAbortControllers((prev) => + new Map(prev).set(currChatSessionId, controller) + ); const messageToResend = messageHistory.find( (message) => message.messageId === messageIdToResend ); - const messageMap = completeMessageDetail.messageMap; + const messageMap = currentMessageMap(completeMessageDetail); const messageToResendParent = messageToResend?.parentMessageId !== null && messageToResend?.parentMessageId !== undefined @@ -672,23 +938,28 @@ export function ChatPage({ const messageToResendIndex = messageToResend ? messageHistory.indexOf(messageToResend) : null; + if (!messageToResend && messageIdToResend !== undefined) { setPopup({ message: "Failed to re-send message - please refresh the page and try again.", type: "error", }); + resetRegenerationState(currentSessionId()); + updateChatState("input", frozenSessionId); return; } - let currMessage = messageToResend ? messageToResend.message : message; if (messageOverride) { currMessage = messageOverride; } + + setSubmittedMessage(currMessage); const currMessageHistory = messageToResendIndex !== null ? messageHistory.slice(0, messageToResendIndex) : messageHistory; + let parentMessage = messageToResendParent || (currMessageHistory.length > 0 @@ -696,46 +967,15 @@ export function ChatPage({ : null) || (messageMap.size === 1 ? Array.from(messageMap.values())[0] : null); - // if we're resending, set the parent's child to null - // we will use tempMessages until the regenerated message is complete - const messageUpdates: Message[] = [ - { - messageId: TEMP_USER_MESSAGE_ID, - message: currMessage, - type: "user", - files: currentMessageFiles, - toolCalls: [], - parentMessageId: parentMessage?.messageId || null, - }, - ]; - if (parentMessage) { - messageUpdates.push({ - ...parentMessage, - childrenMessageIds: (parentMessage.childrenMessageIds || []).concat([ - TEMP_USER_MESSAGE_ID, - ]), - latestChildMessageId: TEMP_USER_MESSAGE_ID, - }); - } - const { messageMap: frozenMessageMap, sessionId: frozenSessionId } = - upsertToCompleteMessageMap({ - messages: messageUpdates, - chatSessionId: currChatSessionId, - }); - - // on initial message send, we insert a dummy system message - // set this as the parent here if no parent is set - if (!parentMessage && frozenMessageMap.size === 2) { - parentMessage = frozenMessageMap.get(SYSTEM_MESSAGE_ID) || null; - } - - const currentAssistantId = alternativeAssistant - ? alternativeAssistant.id - : selectedAssistant?.id; + const currentAssistantId = alternativeAssistantOverride + ? alternativeAssistantOverride.id + : alternativeAssistant + ? alternativeAssistant.id + : liveAssistant.id; resetInputBar(); + let messageUpdates: Message[] | null = null; - setIsStreaming(true); let answer = ""; let query: string | null = null; let retrievalType: RetrievalType = @@ -745,21 +985,37 @@ export function ChatPage({ let documents: DanswerDocument[] = selectedDocuments; let aiMessageImages: FileDescriptor[] | null = null; let error: string | null = null; + let stackTrace: string | null = null; + let finalMessage: BackendMessage | null = null; let toolCalls: ToolCallMetadata[] = []; + let initialFetchDetails: null | { + user_message_id: number; + assistant_message_id: number; + frozenMessageMap: Map; + } = null; + try { + const mapKeys = Array.from( + currentMessageMap(completeMessageDetail).keys() + ); + const systemMessage = Math.min(...mapKeys); + const lastSuccessfulMessageId = - getLastSuccessfulMessageId(currMessageHistory); + getLastSuccessfulMessageId(currMessageHistory) || systemMessage; const stack = new CurrentMessageFIFO(); updateCurrentMessageFIFO(stack, { + signal: controller.signal, // Add this line message: currMessage, alternateAssistantId: currentAssistantId, fileDescriptors: currentMessageFiles, - parentMessageId: lastSuccessfulMessageId, + parentMessageId: + regenerationRequest?.parentMessage.messageId || + lastSuccessfulMessageId, chatSessionId: currChatSessionId, - promptId: livePersona?.prompts[0]?.id || 0, + promptId: liveAssistant?.prompts[0]?.id || 0, filters: buildFilters( filterManager.selectedSources, filterManager.selectedDocumentSets, @@ -774,34 +1030,24 @@ export function ChatPage({ .map((document) => document.db_doc_id as number), queryOverride, forceSearch, - - modelProvider: llmOverrideManager.llmOverride.name || undefined, + regenerate: regenerationRequest !== undefined, + modelProvider: + modelOverRide?.name || + llmOverrideManager.llmOverride.name || + llmOverrideManager.globalDefault.name || + undefined, modelVersion: + modelOverRide?.modelName || llmOverrideManager.llmOverride.modelName || searchParams.get(SEARCH_PARAM_NAMES.MODEL_VERSION) || + llmOverrideManager.globalDefault.modelName || undefined, - temperature: - llmOverrideManager.temperature || - parseFloat(searchParams.get(SEARCH_PARAM_NAMES.TEMPERATURE) || "") || - undefined, + temperature: llmOverrideManager.temperature || undefined, systemPromptOverride: searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined, useExistingUserMessage: isSeededChat, }); - const updateFn = (messages: Message[]) => { - const replacementsMap = finalMessage - ? new Map([ - [messages[0].messageId, TEMP_USER_MESSAGE_ID], - [messages[1].messageId, TEMP_ASSISTANT_MESSAGE_ID], - ] as [number, number][]) - : null; - upsertToCompleteMessageMap({ - messages: messages, - replacementsMap: replacementsMap, - completeMessageMapOverride: frozenMessageMap, - chatSessionId: frozenSessionId!, - }); - }; + const delay = (ms: number) => { return new Promise((resolve) => setTimeout(resolve, ms)); }; @@ -812,18 +1058,84 @@ export function ChatPage({ if (!stack.isEmpty()) { const packet = stack.nextPacket(); + if (!packet) { + continue; + } + + if (!initialFetchDetails) { + if (!Object.hasOwn(packet, "user_message_id")) { + console.error( + "First packet should contain message response info " + ); + continue; + } + + const messageResponseIDInfo = packet as MessageResponseIDInfo; + + const user_message_id = messageResponseIDInfo.user_message_id!; + const assistant_message_id = + messageResponseIDInfo.reserved_assistant_message_id; + + // we will use tempMessages until the regenerated message is complete + messageUpdates = [ + { + messageId: regenerationRequest + ? regenerationRequest?.parentMessage?.messageId! + : user_message_id, + message: currMessage, + type: "user", + files: currentMessageFiles, + toolCalls: [], + parentMessageId: parentMessage?.messageId || SYSTEM_MESSAGE_ID, + }, + ]; + + if (parentMessage && !regenerationRequest) { + messageUpdates.push({ + ...parentMessage, + childrenMessageIds: ( + parentMessage.childrenMessageIds || [] + ).concat([user_message_id]), + latestChildMessageId: user_message_id, + }); + } + + const { messageMap: currentFrozenMessageMap } = + upsertToCompleteMessageMap({ + messages: messageUpdates, + chatSessionId: currChatSessionId, + }); + + const frozenMessageMap = currentFrozenMessageMap; + initialFetchDetails = { + frozenMessageMap, + assistant_message_id, + user_message_id, + }; + + resetRegenerationState(); + } else { + const { user_message_id, frozenMessageMap } = initialFetchDetails; + + setChatState((prevState) => { + if (prevState.get(chatSessionIdRef.current!) === "loading") { + return new Map(prevState).set( + chatSessionIdRef.current!, + "streaming" + ); + } + return prevState; + }); - if (packet) { if (Object.hasOwn(packet, "answer_piece")) { answer += (packet as AnswerPiecePacket).answer_piece; } else if (Object.hasOwn(packet, "top_documents")) { documents = (packet as DocumentsResponse).top_documents; - query = (packet as DocumentsResponse).rephrased_query; retrievalType = RetrievalType.Search; if (documents && documents.length > 0) { // point to the latest message (we don't know the messageId yet, which is why // we have to use -1) - setSelectedMessageForDocDisplay(TEMP_USER_MESSAGE_ID); + setSelectedMessageForDocDisplay(user_message_id); } } else if (Object.hasOwn(packet, "tool_name")) { toolCalls = [ @@ -833,6 +1145,20 @@ export function ChatPage({ tool_result: (packet as ToolCallMetadata).tool_result, }, ]; + if ( + !toolCalls[0].tool_result || + toolCalls[0].tool_result == undefined + ) { + updateChatState("toolBuilding", frozenSessionId); + } else { + updateChatState("streaming", frozenSessionId); + } + + // This will be consolidated in upcoming tool calls udpate, + // but for now, we need to set query as early as possible + if (toolCalls[0].tool_name == SEARCH_TOOL_NAME) { + query = toolCalls[0].tool_args["query"]; + } } else if (Object.hasOwn(packet, "file_ids")) { aiMessageImages = (packet as ImageGenerationDisplay).file_ids.map( (fileId) => { @@ -844,27 +1170,57 @@ export function ChatPage({ ); } else if (Object.hasOwn(packet, "error")) { error = (packet as StreamingError).error; + stackTrace = (packet as StreamingError).stack_trace; } else if (Object.hasOwn(packet, "message_id")) { finalMessage = packet as BackendMessage; } - const newUserMessageId = - finalMessage?.parent_message || TEMP_USER_MESSAGE_ID; - const newAssistantMessageId = - finalMessage?.message_id || TEMP_ASSISTANT_MESSAGE_ID; + // on initial message send, we insert a dummy system message + // set this as the parent here if no parent is set + parentMessage = + parentMessage || frozenMessageMap?.get(SYSTEM_MESSAGE_ID)!; + + const updateFn = (messages: Message[]) => { + const replacementsMap = regenerationRequest + ? new Map([ + [ + regenerationRequest?.parentMessage?.messageId, + regenerationRequest?.parentMessage?.messageId, + ], + [ + regenerationRequest?.messageId, + initialFetchDetails?.assistant_message_id, + ], + ] as [number, number][]) + : null; + + return upsertToCompleteMessageMap({ + messages: messages, + replacementsMap: replacementsMap, + completeMessageMapOverride: frozenMessageMap, + chatSessionId: frozenSessionId!, + }); + }; + updateFn([ { - messageId: newUserMessageId, + messageId: regenerationRequest + ? regenerationRequest?.parentMessage?.messageId! + : initialFetchDetails.user_message_id!, message: currMessage, type: "user", files: currentMessageFiles, toolCalls: [], - parentMessageId: parentMessage?.messageId || null, - childrenMessageIds: [newAssistantMessageId], - latestChildMessageId: newAssistantMessageId, + parentMessageId: error ? null : lastSuccessfulMessageId, + childrenMessageIds: [ + ...(regenerationRequest?.parentMessage?.childrenMessageIds || + []), + initialFetchDetails.assistant_message_id!, + ], + latestChildMessageId: initialFetchDetails.assistant_message_id, }, { - messageId: newAssistantMessageId, + messageId: initialFetchDetails.assistant_message_id!, message: error || answer, type: error ? "error" : "assistant", retrievalType, @@ -874,15 +1230,15 @@ export function ChatPage({ citations: finalMessage?.citations || {}, files: finalMessage?.files || aiMessageImages || [], toolCalls: finalMessage?.tool_calls || toolCalls, - parentMessageId: newUserMessageId, - alternateAssistantID: selectedAssistant?.id, + parentMessageId: regenerationRequest + ? regenerationRequest?.parentMessage?.messageId! + : initialFetchDetails.user_message_id, + alternateAssistantID: alternativeAssistant?.id, + stackTrace: stackTrace, + overridden_model: finalMessage?.overridden_model, }, ]); } - if (isCancelledRef.current) { - setIsCancelled(false); - break; - } } } } catch (e: any) { @@ -890,7 +1246,8 @@ export function ChatPage({ upsertToCompleteMessageMap({ messages: [ { - messageId: TEMP_USER_MESSAGE_ID, + messageId: + initialFetchDetails?.user_message_id || TEMP_USER_MESSAGE_ID, message: currMessage, type: "user", files: currentMessageFiles, @@ -898,23 +1255,30 @@ export function ChatPage({ parentMessageId: parentMessage?.messageId || SYSTEM_MESSAGE_ID, }, { - messageId: TEMP_ASSISTANT_MESSAGE_ID, + messageId: + initialFetchDetails?.assistant_message_id || + TEMP_ASSISTANT_MESSAGE_ID, message: errorMsg, type: "error", files: aiMessageImages || [], toolCalls: [], - parentMessageId: TEMP_USER_MESSAGE_ID, + parentMessageId: + initialFetchDetails?.user_message_id || TEMP_USER_MESSAGE_ID, }, ], - completeMessageMapOverride: frozenMessageMap, + completeMessageMapOverride: currentMessageMap(completeMessageDetail), }); } - setIsStreaming(false); + resetRegenerationState(currentSessionId()); + + updateChatState("input"); if (isNewSession) { if (finalMessage) { setSelectedMessageForDocDisplay(finalMessage.message_id); } + if (!searchParamBasedChatSessionName) { + await new Promise((resolve) => setTimeout(resolve, 200)); await nameChatSession(currChatSessionId, currMessage); } @@ -971,19 +1335,26 @@ export function ChatPage({ } }; - const onPersonaChange = (persona: Persona | null) => { - if (persona && persona.id !== livePersona.id) { - // remove uploaded files - setCurrentMessageFiles([]); - setSelectedPersona(persona); + const onAssistantChange = (assistant: Persona | null) => { + if (assistant && assistant.id !== liveAssistant.id) { + // Abort the ongoing stream if it exists + if (currentSessionChatState != "input") { + stopGenerating(); + resetInputBar(); + } + textAreaRef.current?.focus(); - router.push(buildChatUrl(searchParams, null, persona.id)); + router.push(buildChatUrl(searchParams, null, assistant.id)); } }; const handleImageUpload = (acceptedFiles: File[]) => { const llmAcceptsImages = checkLLMSupportsImageInput( - ...getFinalLLM(llmProviders, livePersona, llmOverrideManager.llmOverride) + ...getFinalLLM( + llmProviders, + liveAssistant, + llmOverrideManager.llmOverride + ) ); const imageFiles = acceptedFiles.filter((file) => file.type.startsWith("image/") @@ -1035,579 +1406,797 @@ export function ChatPage({ // settings are passed in via Context and therefore aren't // available in server-side components const settings = useContext(SettingsContext); + const enterpriseSettings = settings?.enterpriseSettings; if (settings?.settings?.chat_page_enabled === false) { router.push("/search"); } - const [showDocSidebar, setShowDocSidebar] = useState(true); // State to track if sidebar is open + const [showDocSidebar, setShowDocSidebar] = useState(false); // State to track if sidebar is open - const toggleSidebar = () => { - if (sidebarElementRef.current) { - sidebarElementRef.current.style.transition = "width 0.3s ease-in-out"; + // Used to maintain a "time out" for history sidebar so our existing refs can have time to process change + const [untoggled, setUntoggled] = useState(false); - sidebarElementRef.current.style.width = showDocSidebar - ? "0px" - : `${usedSidebarWidth}px`; - } + const explicitlyUntoggle = () => { + setShowDocSidebar(false); + + setUntoggled(true); + setTimeout(() => { + setUntoggled(false); + }, 200); + }; + const toggleSidebar = () => { + Cookies.set( + SIDEBAR_TOGGLED_COOKIE_NAME, + String(!toggledSidebar).toLocaleLowerCase() + ), + { + path: "/", + }; - setShowDocSidebar((showDocSidebar) => !showDocSidebar); // Toggle the state which will in turn toggle the class + toggle(); + }; + const removeToggle = () => { + setShowDocSidebar(false); + toggle(false); }; + const sidebarElementRef = useRef(null); + + useSidebarVisibility({ + toggledSidebar, + sidebarElementRef, + showDocSidebar, + setShowDocSidebar, + setToggled: removeToggle, + mobile: settings?.isMobile, + }); + + useScrollonStream({ + chatState: currentSessionChatState, + scrollableDivRef, + scrollDist, + endDivRef, + distance, + debounce, + }); + useEffect(() => { const includes = checkAnyAssistantHasSearch( messageHistory, - availablePersonas, - livePersona + availableAssistants, + liveAssistant ); setRetrievalEnabled(includes); - }, [messageHistory, availablePersonas, livePersona]); + }, [messageHistory, availableAssistants, liveAssistant]); const [retrievalEnabled, setRetrievalEnabled] = useState(() => { return checkAnyAssistantHasSearch( messageHistory, - availablePersonas, - livePersona + availableAssistants, + liveAssistant ); }); - const [editingRetrievalEnabled, setEditingRetrievalEnabled] = useState(false); - const sidebarElementRef = useRef(null); + const [stackTraceModalContent, setStackTraceModalContent] = useState< + string | null + >(null); + const innerSidebarElementRef = useRef(null); + const [settingsToggled, setSettingsToggled] = useState(false); - const currentPersona = selectedAssistant || livePersona; + const currentPersona = alternativeAssistant || liveAssistant; - const updateSelectedAssistant = (newAssistant: Persona | null) => { - setSelectedAssistant(newAssistant); - if (newAssistant) { - setEditingRetrievalEnabled(personaIncludesRetrieval(newAssistant)); - } else { - setEditingRetrievalEnabled(false); - } + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.metaKey || event.ctrlKey) { + switch (event.key.toLowerCase()) { + case "e": + event.preventDefault(); + toggleSidebar(); + break; + } + } + }; + + window.addEventListener("keydown", handleKeyDown); + return () => { + window.removeEventListener("keydown", handleKeyDown); + }; + }, [router]); + const [sharedChatSession, setSharedChatSession] = + useState(); + const [deletingChatSession, setDeletingChatSession] = + useState(); + + const showDeleteModal = (chatSession: ChatSession) => { + setDeletingChatSession(chatSession); + }; + const showShareModal = (chatSession: ChatSession) => { + setSharedChatSession(chatSession); }; - console.log(hasPerformedInitialScroll); + const [documentSelection, setDocumentSelection] = useState(false); + const toggleDocumentSelectionAspects = () => { + setDocumentSelection((documentSelection) => !documentSelection); + setShowDocSidebar(false); + }; + const secondsUntilExpiration = getSecondsUntilExpiration(user); + + interface RegenerationRequest { + messageId: number; + parentMessage: Message; + } + + function createRegenerator(regenerationRequest: RegenerationRequest) { + // Returns new function that only needs `modelOverRide` to be specified when called + return async function (modelOverRide: LlmOverride) { + return await onSubmit({ + modelOverRide, + messageIdToResend: regenerationRequest.parentMessage.messageId, + regenerationRequest, + }); + }; + } + return ( <> - - - + {/* ChatPopup is a custom popup that displays a admin-specified message on initial user visit. Only used in the EE version of the app. */} + {popup} + + {currentFeedback && ( + setCurrentFeedback(null)} + onSubmit={({ message, predefinedFeedback }) => { + onFeedback( + currentFeedback[1], + currentFeedback[0], + message, + predefinedFeedback + ); + setCurrentFeedback(null); + }} + /> + )} + + {settingsToggled && ( + setSettingsToggled(false)} + /> + )} + + {deletingChatSession && ( + setDeletingChatSession(null)} + onSubmit={async () => { + const response = await deleteChatSession(deletingChatSession.id); + if (response.ok) { + setDeletingChatSession(null); + // go back to the main page + router.push("/chat"); + } else { + alert("Failed to delete chat session"); + } + }} + /> + )} -
    - setStackTraceModalContent(null)} + exceptionTrace={stackTraceModalContent} + /> + )} + + {sharedChatSession && ( + setSharedChatSession(null)} + onShare={(shared) => + setChatSessionSharedStatus( + shared + ? ChatSessionSharedStatus.Public + : ChatSessionSharedStatus.Private + ) + } + /> + )} + {sharingModalVisible && chatSessionIdRef.current !== null && ( + setSharingModalVisible(false)} /> + )} +
    +
    +
    +
    +
    + setMessage("")} + page="chat" + ref={innerSidebarElementRef} + toggleSidebar={toggleSidebar} + toggled={toggledSidebar && !settings?.isMobile} + existingChats={chatSessions} + currentChatSession={selectedChatSession} + folders={folders} + openedFolders={openedFolders} + removeToggle={removeToggle} + showShareModal={showShareModal} + showDeleteModal={showDeleteModal} + /> +
    +
    +
    + +
    +
    + {liveAssistant && ( + setMessage("")} + page="chat" + setSharingModalVisible={ + chatSessionIdRef.current !== null + ? setSharingModalVisible + : undefined + } + toggleSidebar={toggleSidebar} + user={user} + currentChatSession={selectedChatSession} + /> + )} -
    - {popup} - {currentFeedback && ( - setCurrentFeedback(null)} - onSubmit={({ message, predefinedFeedback }) => { - onFeedback( - currentFeedback[1], - currentFeedback[0], - message, - predefinedFeedback - ); - setCurrentFeedback(null); - }} - /> - )} - - {sharingModalVisible && chatSessionIdRef.current !== null && ( - setSharingModalVisible(false)} - onShare={(shared) => - setChatSessionSharedStatus( - shared - ? ChatSessionSharedStatus.Public - : ChatSessionSharedStatus.Private - ) - } - /> - )} - - setConfigModalActiveTab(null)} - filterManager={filterManager} - availableAssistants={filteredAssistants} - selectedAssistant={livePersona} - setSelectedAssistant={onPersonaChange} - llmProviders={llmProviders} - llmOverrideManager={llmOverrideManager} - /> - - {documentSidebarInitialWidth !== undefined ? ( - - {({ getRootProps }) => ( - <> -
    + {({ getRootProps }) => ( +
    + {!settings?.isMobile && ( +
    - {/* */} - -
    - {/* ChatBanner is a custom banner that displays a admin-specified message at - the top of the chat page. Only used in the EE version of the app. */} - - - {livePersona && ( -
    -
    -
    - -
    - -
    - {chatSessionIdRef.current !== null && ( -
    setSharingModalVisible(true)} - className={` - my-auto - p-2 - rounded - cursor-pointer - hover:bg-hover-light - `} - > - -
    - )} - -
    - - {retrievalEnabled && !showDocSidebar && ( - - )} -
    -
    -
    -
    + >
    )} - {messageHistory.length === 0 && - !isFetchingChatMessages && - !isStreaming && ( - - )} -
    - {messageHistory.map((message, i) => { - const messageMap = completeMessageDetail.messageMap; - const messageReactComponentKey = `${i}-${completeMessageDetail.sessionId}`; - if (message.type === "user") { - const parentMessage = message.parentMessageId - ? messageMap.get(message.parentMessageId) - : null; - return ( -
    - { - const parentMessageId = - message.parentMessageId!; - const parentMessage = - messageMap.get(parentMessageId)!; - upsertToCompleteMessageMap({ - messages: [ - { - ...parentMessage, - latestChildMessageId: null, - }, - ], - }); - onSubmit({ - messageIdToResend: - message.messageId || undefined, - messageOverride: editedContent, - }); - }} - onMessageSelection={(messageId) => { - const newCompleteMessageMap = new Map( - messageMap - ); - newCompleteMessageMap.get( - message.parentMessageId! - )!.latestChildMessageId = messageId; - setCompleteMessageDetail({ - sessionId: - completeMessageDetail.sessionId, - messageMap: newCompleteMessageMap, - }); - setSelectedMessageForDocDisplay(messageId); - // set message as latest so we can edit this message - // and so it sticks around on page reload - setMessageAsLatest(messageId); - }} - /> -
    - ); - } else if (message.type === "assistant") { - const isShowingRetrieved = - (selectedMessageForDocDisplay !== null && - selectedMessageForDocDisplay === - message.messageId) || - (selectedMessageForDocDisplay === - TEMP_USER_MESSAGE_ID && - i === messageHistory.length - 1); - const previousMessage = - i !== 0 ? messageHistory[i - 1] : null; - - const currentAlternativeAssistant = - message.alternateAssistantID != null - ? availablePersonas.find( - (persona) => - persona.id == message.alternateAssistantID - ) + {/* */} +
    + {/* ChatBanner is a custom banner that displays a admin-specified message at + the top of the chat page. Oly used in the EE version of the app. */} + + {messageHistory.length === 0 && + !isFetchingChatMessages && + currentSessionChatState == "input" && ( + + )} +
    + {messageHistory.map((message, i) => { + const messageMap = currentMessageMap( + completeMessageDetail + ); + const messageReactComponentKey = `${i}-${currentSessionId()}`; + const parentMessage = message.parentMessageId + ? messageMap.get(message.parentMessageId) : null; - - return ( -
    = + currentSessionRegenerationState?.finalMessageIndex! + ) { + return <>; + } + + if (message.type === "user") { + return ( +
    + { + const parentMessageId = + message.parentMessageId!; + const parentMessage = + messageMap.get(parentMessageId)!; + upsertToCompleteMessageMap({ + messages: [ + { + ...parentMessage, + latestChildMessageId: null, + }, + ], + }); + onSubmit({ + messageIdToResend: + message.messageId || undefined, + messageOverride: editedContent, + }); + }} + otherMessagesCanSwitchTo={ + parentMessage?.childrenMessageIds || [] + } + onMessageSelection={(messageId) => { + const newCompleteMessageMap = new Map( + messageMap + ); + newCompleteMessageMap.get( + message.parentMessageId! + )!.latestChildMessageId = messageId; + updateCompleteMessageDetail( + currentSessionId(), + newCompleteMessageMap + ); + setSelectedMessageForDocDisplay( + messageId + ); + // set message as latest so we can edit this message + // and so it sticks around on page reload + setMessageAsLatest(messageId); + }} + /> +
    + ); + } else if (message.type === "assistant") { + const isShowingRetrieved = + (selectedMessageForDocDisplay !== null && + selectedMessageForDocDisplay === + message.messageId) || + i === messageHistory.length - 1; + const previousMessage = + i !== 0 ? messageHistory[i - 1] : null; + + const currentAlternativeAssistant = + message.alternateAssistantID != null + ? availableAssistants.find( + (persona) => + persona.id == + message.alternateAssistantID + ) + : null; + + if ( + currentSessionRegenerationState?.regenerating && + currentSessionChatState == "loading" && + message.messageId == messageHistory.length - 1 + ) { + return <>; } - > - 0) === true - } - handleFeedback={ - i === messageHistory.length - 1 && - isStreaming - ? undefined - : (feedbackType) => - setCurrentFeedback([ - feedbackType, - message.messageId as number, - ]) - } - handleSearchQueryEdit={ - i === messageHistory.length - 1 && - !isStreaming - ? (newQuery) => { - if (!previousMessage) { - setPopup({ - type: "error", - message: - "Cannot edit query of first message - please refresh the page and try again.", - }); - return; - } + return ( +
    + { + const newCompleteMessageMap = new Map( + messageMap + ); + newCompleteMessageMap.get( + message.parentMessageId! + )!.latestChildMessageId = messageId; + + updateCompleteMessageDetail( + currentSessionId(), + newCompleteMessageMap + ); - if ( - previousMessage.messageId === null - ) { - setPopup({ - type: "error", - message: - "Cannot edit query of a pending message - please wait a few seconds and try again.", - }); - return; + setSelectedMessageForDocDisplay( + messageId + ); + // set message as latest so we can edit this message + // and so it sticks around on page reload + setMessageAsLatest(messageId); + }} + isActive={messageHistory.length - 1 == i} + selectedDocuments={selectedDocuments} + toggleDocumentSelection={ + toggleDocumentSelectionAspects + } + docs={message.documents} + currentPersona={liveAssistant} + alternativeAssistant={ + currentAlternativeAssistant + } + messageId={message.messageId} + content={message.message} + // content={message.message} + files={message.files} + query={ + messageHistory[i]?.query || undefined + } + personaName={liveAssistant.name} + citedDocuments={getCitedDocumentsFromMessage( + message + )} + toolCall={ + message.toolCalls && + message.toolCalls[0] + } + isComplete={ + i !== messageHistory.length - 1 || + (currentSessionChatState != + "streaming" && + currentSessionChatState != + "toolBuilding") + } + hasDocs={ + (message.documents && + message.documents.length > 0) === true + } + handleFeedback={ + i === messageHistory.length - 1 && + currentSessionChatState != "input" + ? undefined + : (feedbackType) => + setCurrentFeedback([ + feedbackType, + message.messageId as number, + ]) + } + handleSearchQueryEdit={ + i === messageHistory.length - 1 && + currentSessionChatState == "input" + ? (newQuery) => { + if (!previousMessage) { + setPopup({ + type: "error", + message: + "Cannot edit query of first message - please refresh the page and try again.", + }); + return; + } + if ( + previousMessage.messageId === + null + ) { + setPopup({ + type: "error", + message: + "Cannot edit query of a pending message - please wait a few seconds and try again.", + }); + return; + } + onSubmit({ + messageIdToResend: + previousMessage.messageId, + queryOverride: newQuery, + alternativeAssistantOverride: + currentAlternativeAssistant, + }); + } + : undefined + } + isCurrentlyShowingRetrieved={ + isShowingRetrieved + } + handleShowRetrieved={(messageNumber) => { + if (isShowingRetrieved) { + setSelectedMessageForDocDisplay(null); + } else { + if (messageNumber !== null) { + setSelectedMessageForDocDisplay( + messageNumber + ); + } else { + setSelectedMessageForDocDisplay(-1); } + } + }} + handleForceSearch={() => { + if ( + previousMessage && + previousMessage.messageId + ) { onSubmit({ messageIdToResend: previousMessage.messageId, - queryOverride: newQuery, - alternativeAssistant: + forceSearch: true, + alternativeAssistantOverride: currentAlternativeAssistant, }); + } else { + setPopup({ + type: "error", + message: + "Failed to force search - please refresh the page and try again.", + }); } - : undefined - } - isCurrentlyShowingRetrieved={ - isShowingRetrieved - } - handleShowRetrieved={(messageNumber) => { - if (isShowingRetrieved) { - setSelectedMessageForDocDisplay(null); - } else { - if (messageNumber !== null) { - setSelectedMessageForDocDisplay( - messageNumber - ); - } else { - setSelectedMessageForDocDisplay(-1); + }} + retrievalDisabled={ + currentAlternativeAssistant + ? !personaIncludesRetrieval( + currentAlternativeAssistant! + ) + : !retrievalEnabled } - } - }} - handleForceSearch={() => { - if ( - previousMessage && - previousMessage.messageId - ) { - onSubmit({ - messageIdToResend: - previousMessage.messageId, - forceSearch: true, - alternativeAssistant: - currentAlternativeAssistant, - }); - } else { - setPopup({ - type: "error", - message: - "Failed to force search - please refresh the page and try again.", - }); - } - }} - retrievalDisabled={ - currentAlternativeAssistant - ? !personaIncludesRetrieval( - currentAlternativeAssistant! - ) - : !retrievalEnabled - } + /> +
    + ); + } else { + return ( +
    + + {message.message} + {message.stackTrace && ( + + setStackTraceModalContent( + message.stackTrace! + ) + } + className="ml-2 cursor-pointer underline" + > + Show stack trace. + + )} +

    + } + /> +
    + ); + } + })} + + {currentSessionChatState == "loading" && + !currentSessionRegenerationState?.regenerating && + messageHistory[messageHistory.length - 1]?.type != + "user" && ( + -
    - ); - } else { - return ( -
    + )} + + {currentSessionChatState == "loading" && ( +
    - {message.message} -

    +
    + + Thinking... + +
    } />
    - ); - } - })} - {isStreaming && - messageHistory.length > 0 && - messageHistory[messageHistory.length - 1].type === - "user" && ( -
    - - -
    - } - /> -
    - )} - - {/* Some padding at the bottom so the search bar has space at the bottom to not cover the last message*/} -
    -
    - - {currentPersona && - currentPersona.starter_messages && - currentPersona.starter_messages.length > 0 && - selectedPersona && - messageHistory.length === 0 && - !isFetchingChatMessages && ( -
    - {currentPersona.starter_messages.map( - (starterMessage, i) => ( -
    - - onSubmit({ - messageOverride: - starterMessage.message, - }) + )} + + {currentPersona && + currentPersona.starter_messages && + currentPersona.starter_messages.length > 0 && + selectedAssistant && + messageHistory.length === 0 && + !isFetchingChatMessages && ( +
    + {currentPersona.starter_messages.map( + (starterMessage, i) => ( +
    + + onSubmit({ + messageOverride: + starterMessage.message, + }) + } + /> +
    + ) + )} +
    + )} + {/* Some padding at the bottom so the search bar has space at the bottom to not cover the last message*/} +
    +
    +
    +
    +
    +
    + {aboveHorizon && ( +
    + +
    + )} + setSettingsToggled(true)} + inputPrompts={userInputPrompts} + showDocs={() => setDocumentSelection(true)} + selectedDocuments={selectedDocuments} + // assistant stuff + assistantOptions={filteredAssistants} + selectedAssistant={liveAssistant} + setSelectedAssistant={onAssistantChange} + setAlternativeAssistant={setAlternativeAssistant} + alternativeAssistant={alternativeAssistant} + // end assistant stuff + message={message} + setMessage={setMessage} + onSubmit={onSubmit} + filterManager={filterManager} + llmOverrideManager={llmOverrideManager} + files={currentMessageFiles} + setFiles={setCurrentMessageFiles} + handleFileUpload={handleImageUpload} + textAreaRef={textAreaRef} + chatSessionId={chatSessionIdRef.current!} + /> + + {enterpriseSettings && + enterpriseSettings.custom_lower_disclaimer_content && ( +
    +
    +
    - ) +
    )} -
    - )} -
    -
    -
    -
    -
    - {aboveHorizon && ( -
    - + {enterpriseSettings && + enterpriseSettings.use_custom_logotype && ( +
    + logotype +
    + )}
    - )} - - { - updateSelectedAssistant(alternativeAssistant); - }} - alternativeAssistant={selectedAssistant} - personas={filteredAssistants} - message={message} - setMessage={setMessage} - onSubmit={onSubmit} - isStreaming={isStreaming} - setIsCancelled={setIsCancelled} - retrievalDisabled={ - !personaIncludesRetrieval(currentPersona) - } - filterManager={filterManager} - llmOverrideManager={llmOverrideManager} - selectedAssistant={livePersona} - files={currentMessageFiles} - setFiles={setCurrentMessageFiles} - handleFileUpload={handleImageUpload} - setConfigModalActiveTab={setConfigModalActiveTab} - textAreaRef={textAreaRef} - /> +
    + )} + + ) : ( +
    +
    +
    +
    - - {retrievalEnabled || editingRetrievalEnabled ? ( -
    - - toggleSidebar()} - selectedMessage={aiMessage} - selectedDocuments={selectedDocuments} - toggleDocumentSelection={toggleDocumentSelection} - clearSelectedDocuments={clearSelectedDocuments} - selectedDocumentTokens={selectedDocumentTokens} - maxTokens={maxTokens} - isLoading={isFetchingChatMessages} - /> - -
    - ) : // Another option is to use a div with the width set to the initial width, so that the - // chat section appears in the same place as before - //
    - null} - +
    )} - - ) : ( -
    -
    - -
    - )} +
    +
    + setDocumentSelection(false)} + selectedMessage={aiMessage} + selectedDocuments={selectedDocuments} + toggleDocumentSelection={toggleDocumentSelection} + clearSelectedDocuments={clearSelectedDocuments} + selectedDocumentTokens={selectedDocumentTokens} + maxTokens={maxTokens} + isLoading={isFetchingChatMessages} + isOpen={documentSelection} + /> ); } diff --git a/web/src/app/chat/RegenerateOption.tsx b/web/src/app/chat/RegenerateOption.tsx new file mode 100644 index 00000000000..7c0f97676d2 --- /dev/null +++ b/web/src/app/chat/RegenerateOption.tsx @@ -0,0 +1,184 @@ +import { useChatContext } from "@/components/context/ChatContext"; +import { + getDisplayNameForModel, + LlmOverride, + useLlmOverride, +} from "@/lib/hooks"; +import { + DefaultDropdownElement, + StringOrNumberOption, +} from "@/components/Dropdown"; + +import { Persona } from "@/app/admin/assistants/interfaces"; +import { destructureValue, getFinalLLM, structureValue } from "@/lib/llm/utils"; +import { useState } from "react"; +import { Hoverable } from "@/components/Hoverable"; +import { Popover } from "@/components/popover/Popover"; +import { FiStar } from "react-icons/fi"; +import { StarFeedback } from "@/components/icons/icons"; +import { IconType } from "react-icons"; + +export function RegenerateDropdown({ + options, + selected, + onSelect, + side, + maxHeight, + alternate, +}: { + alternate?: string; + options: StringOrNumberOption[]; + selected: string | null; + onSelect: (value: string | number | null) => void; + includeDefault?: boolean; + side?: "top" | "right" | "bottom" | "left"; + maxHeight?: string; +}) { + const [isOpen, setIsOpen] = useState(false); + + const Dropdown = ( +
    +

    + Pick a model +

    + {options.map((option, ind) => { + const isSelected = option.value === selected; + return ( + onSelect(option.value)} + isSelected={isSelected} + /> + ); + })} +
    + ); + + return ( + setIsOpen(open)} + content={ +
    setIsOpen(!isOpen)}> + {!alternate ? ( + + ) : ( + + )} +
    + } + popover={Dropdown} + align="start" + side={side} + sideOffset={5} + triggerMaxWidth + /> + ); +} + +export default function RegenerateOption({ + selectedAssistant, + regenerate, + overriddenModel, + onHoverChange, +}: { + selectedAssistant: Persona; + regenerate: (modelOverRide: LlmOverride) => Promise; + overriddenModel?: string; + onHoverChange: (isHovered: boolean) => void; +}) { + const llmOverrideManager = useLlmOverride(); + + const { llmProviders } = useChatContext(); + const [_, llmName] = getFinalLLM(llmProviders, selectedAssistant, null); + + const llmOptionsByProvider: { + [provider: string]: { name: string; value: string }[]; + } = {}; + const uniqueModelNames = new Set(); + + llmProviders.forEach((llmProvider) => { + if (!llmOptionsByProvider[llmProvider.provider]) { + llmOptionsByProvider[llmProvider.provider] = []; + } + + (llmProvider.display_model_names || llmProvider.model_names).forEach( + (modelName) => { + if (!uniqueModelNames.has(modelName)) { + uniqueModelNames.add(modelName); + llmOptionsByProvider[llmProvider.provider].push({ + name: modelName, + value: structureValue( + llmProvider.name, + llmProvider.provider, + modelName + ), + }); + } + } + ); + }); + + const llmOptions = Object.entries(llmOptionsByProvider).flatMap( + ([provider, options]) => [...options] + ); + + const currentModelName = + llmOverrideManager?.llmOverride.modelName || + (selectedAssistant + ? selectedAssistant.llm_model_version_override || llmName + : llmName); + + return ( +
    onHoverChange(true)} + onMouseLeave={() => onHoverChange(false)} + > + { + const { name, provider, modelName } = destructureValue( + value as string + ); + regenerate({ + name: name, + provider: provider, + modelName: modelName, + }); + }} + /> +
    + ); +} diff --git a/web/src/app/chat/WrappedChat.tsx b/web/src/app/chat/WrappedChat.tsx new file mode 100644 index 00000000000..cdb8508dfb0 --- /dev/null +++ b/web/src/app/chat/WrappedChat.tsx @@ -0,0 +1,24 @@ +"use client"; +import { ChatPage } from "./ChatPage"; +import FunctionalWrapper from "./shared_chat_search/FunctionalWrapper"; + +export default function WrappedChat({ + defaultAssistantId, + initiallyToggled, +}: { + defaultAssistantId?: number; + initiallyToggled: boolean; +}) { + return ( + ( + + )} + /> + ); +} diff --git a/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx b/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx index 00b3b13cc0b..2c6acf3710b 100644 --- a/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx +++ b/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx @@ -1,8 +1,6 @@ import { HoverPopup } from "@/components/HoverPopup"; import { SourceIcon } from "@/components/SourceIcon"; import { PopupSpec } from "@/components/admin/connectors/Popup"; -import { DocumentFeedbackBlock } from "@/components/search/DocumentFeedbackBlock"; -import { DocumentUpdatedAtBadge } from "@/components/search/DocumentUpdatedAtBadge"; import { DanswerDocument } from "@/lib/search/interfaces"; import { FiInfo, FiRadio } from "react-icons/fi"; import { DocumentSelector } from "./DocumentSelector"; @@ -32,25 +30,34 @@ export function ChatDocumentDisplay({ tokenLimitReached, }: DocumentDisplayProps) { const isInternet = document.is_internet; + // Consider reintroducing null scored docs in the future + + if (document.score === null) { + return null; + } return ( -
    -
    +
    +
    {isInternet ? ( ) : ( )} -

    +

    {document.semantic_identifier || document.document_id}

    @@ -75,6 +82,21 @@ export function ChatDocumentDisplay({ />
    )} +
    + {Math.abs(document.score).toFixed(2)} +
    )} @@ -91,8 +113,9 @@ export function ChatDocumentDisplay({
    -

    +

    {buildDocumentSummaryDisplay(document.match_highlights, document.blurb)} + test

    {/* diff --git a/web/src/app/chat/documentSidebar/DocumentSelector.tsx b/web/src/app/chat/documentSidebar/DocumentSelector.tsx index 833c6a7cadd..2153ce5bdc7 100644 --- a/web/src/app/chat/documentSidebar/DocumentSelector.tsx +++ b/web/src/app/chat/documentSidebar/DocumentSelector.tsx @@ -32,9 +32,8 @@ export function DocumentSelector({ } onClick={onClick} > -

    Select

    ; - closeHeader?: () => void; -}) { - return ( -
    -
    -

    - {icon({ className: "my-auto mr-1" })} - {name} -

    - {closeHeader && ( - - )} -
    -
    - ); -} - interface DocumentSidebarProps { closeSidebar: () => void; selectedMessage: Message | null; @@ -50,6 +16,7 @@ interface DocumentSidebarProps { maxTokens: number; isLoading: boolean; initialWidth: number; + isOpen: boolean; } export const DocumentSidebar = forwardRef( @@ -64,13 +31,12 @@ export const DocumentSidebar = forwardRef( maxTokens, isLoading, initialWidth, + isOpen, }, ref: ForwardedRef ) => { const { popup, setPopup } = usePopup(); - const selectedMessageRetrievalType = selectedMessage?.retrievalType || null; - const selectedDocumentIds = selectedDocuments?.map((document) => document.document_id) || []; @@ -86,70 +52,76 @@ export const DocumentSidebar = forwardRef( return (
    { + if (e.target === e.currentTarget) { + closeSidebar(); + } + }} >
    - {popup} - -
    -
    - +
    + {popup} +
    + {dedupedDocuments.length} Documents +

    + Select to add to continuous context + + Learn more + +

    + + {currentDocuments ? ( -
    -
    - {dedupedDocuments.length > 0 ? ( - dedupedDocuments.map((document, ind) => ( -
    - { - toggleDocumentSelection( - dedupedDocuments.find( - (document) => - document.document_id === documentId - )! - ); - }} - tokenLimitReached={tokenLimitReached} - /> -
    - )) - ) : ( -
    - No documents found for the query. +
    + {dedupedDocuments.length > 0 ? ( + dedupedDocuments.map((document, ind) => ( +
    + { + toggleDocumentSelection( + dedupedDocuments.find( + (document) => document.document_id === documentId + )! + ); + }} + tokenLimitReached={tokenLimitReached} + />
    - )} -
    + )) + ) : ( +
    + No documents found for the query. +
    + )}
    ) : ( !isLoading && ( @@ -163,84 +135,25 @@ export const DocumentSidebar = forwardRef( )}
    -
    -
    -
    - - {tokenLimitReached && ( -
    -
    - - } - popupContent={ - - Over LLM context length by:{" "} - {selectedDocumentTokens - maxTokens} tokens -
    -
    - {selectedDocuments && - selectedDocuments.length > 0 && ( - <> - Truncating: " - - { - selectedDocuments[ - selectedDocuments.length - 1 - ].semantic_identifier - } - - " - - )} -
    - } - direction="left" - /> -
    -
    - )} -
    - {selectedDocuments && selectedDocuments.length > 0 && ( -
    - - De-Select All - -
    - )} -
    +
    +
    + - {selectedDocuments && selectedDocuments.length > 0 ? ( -
    - {selectedDocuments.map((document) => ( - { - toggleDocumentSelection( - dedupedDocuments.find( - (document) => document.document_id === documentId - )! - ); - }} - /> - ))} -
    - ) : ( - !isLoading && ( - - Select documents from the retrieved documents section to chat - specifically with them! - - ) - )} +
    diff --git a/web/src/app/chat/files/InputBarPreview.tsx b/web/src/app/chat/files/InputBarPreview.tsx index 8eee7bbf9cd..5d473b9f2dc 100644 --- a/web/src/app/chat/files/InputBarPreview.tsx +++ b/web/src/app/chat/files/InputBarPreview.tsx @@ -1,8 +1,9 @@ -import { useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { ChatFileType, FileDescriptor } from "../interfaces"; -import { DocumentPreview } from "./documents/DocumentPreview"; + +import { FiX, FiLoader, FiFileText } from "react-icons/fi"; import { InputBarPreviewImage } from "./images/InputBarPreviewImage"; -import { FiX, FiLoader } from "react-icons/fi"; +import { Tooltip } from "@/components/tooltip/Tooltip"; function DeleteButton({ onDelete }: { onDelete: () => void }) { return ( @@ -15,7 +16,7 @@ function DeleteButton({ onDelete }: { onDelete: () => void }) { cursor-pointer border-none bg-hover - p-1 + p-.5 rounded-full z-10 " @@ -25,6 +26,45 @@ function DeleteButton({ onDelete }: { onDelete: () => void }) { ); } +export function InputBarPreviewImageProvider({ + file, + onDelete, + isUploading, +}: { + file: FileDescriptor; + onDelete: () => void; + isUploading: boolean; +}) { + const [isHovered, setIsHovered] = useState(false); + + return ( +
    setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + > + {isHovered && } + {isUploading && ( +
    + +
    + )} + +
    + ); +} + export function InputBarPreview({ file, onDelete, @@ -36,12 +76,16 @@ export function InputBarPreview({ }) { const [isHovered, setIsHovered] = useState(false); - const renderContent = () => { - if (file.type === ChatFileType.IMAGE) { - return ; + const fileNameRef = useRef(null); + const [isOverflowing, setIsOverflowing] = useState(false); + + useEffect(() => { + if (fileNameRef.current) { + setIsOverflowing( + fileNameRef.current.scrollWidth > fileNameRef.current.clientWidth + ); } - return ; - }; + }, [file.name]); return (
    setIsHovered(true)} onMouseLeave={() => setIsHovered(false)} > - {isHovered && } {isUploading && (
    )} - {renderContent()} +
    +
    +
    + +
    +
    +
    + +
    + {file.name} +
    +
    +
    + +
    ); } diff --git a/web/src/app/chat/files/documents/DocumentPreview.tsx b/web/src/app/chat/files/documents/DocumentPreview.tsx index 7e584f34d39..07dddd53cfd 100644 --- a/web/src/app/chat/files/documents/DocumentPreview.tsx +++ b/web/src/app/chat/files/documents/DocumentPreview.tsx @@ -5,9 +5,11 @@ import { Tooltip } from "@/components/tooltip/Tooltip"; export function DocumentPreview({ fileName, maxWidth, + alignBubble, }: { fileName: string; maxWidth?: string; + alignBubble?: boolean; }) { const [isOverflowing, setIsOverflowing] = useState(false); const fileNameRef = useRef(null); @@ -22,7 +24,8 @@ export function DocumentPreview({ return (
    @@ -65,3 +68,69 @@ export function DocumentPreview({
    ); } + +export function InputDocumentPreview({ + fileName, + maxWidth, + alignBubble, +}: { + fileName: string; + maxWidth?: string; + alignBubble?: boolean; +}) { + const [isOverflowing, setIsOverflowing] = useState(false); + const fileNameRef = useRef(null); + + useEffect(() => { + if (fileNameRef.current) { + setIsOverflowing( + fileNameRef.current.scrollWidth > fileNameRef.current.clientWidth + ); + } + }, [fileName]); + + return ( +
    +
    +
    + +
    +
    +
    + +
    + {fileName} +
    +
    +
    +
    + ); +} diff --git a/web/src/app/chat/files/images/InMessageImage.tsx b/web/src/app/chat/files/images/InMessageImage.tsx index 75b58cbed94..6af28350b5b 100644 --- a/web/src/app/chat/files/images/InMessageImage.tsx +++ b/web/src/app/chat/files/images/InMessageImage.tsx @@ -1,11 +1,10 @@ -"use client"; - import { useState } from "react"; import { FullImageModal } from "./FullImageModal"; import { buildImgUrl } from "./utils"; export function InMessageImage({ fileId }: { fileId: string }) { const [fullImageShowing, setFullImageShowing] = useState(false); + const [imageLoaded, setImageLoaded] = useState(false); return ( <> @@ -15,19 +14,23 @@ export function InMessageImage({ fileId }: { fileId: string }) { onOpenChange={(open) => setFullImageShowing(open)} /> - setFullImageShowing(true)} - src={buildImgUrl(fileId)} - loading="lazy" - /> +
    + {!imageLoaded && ( +
    + )} + + Chat Message Image setImageLoaded(true)} + className={`object-contain object-left overflow-hidden rounded-lg w-full h-full max-w-96 max-h-96 transition-opacity duration-300 + ${imageLoaded ? "opacity-100" : "opacity-0"}`} + onClick={() => setFullImageShowing(true)} + src={buildImgUrl(fileId)} + loading="lazy" + /> +
    ); } diff --git a/web/src/app/chat/files/images/InputBarPreviewImage.tsx b/web/src/app/chat/files/images/InputBarPreviewImage.tsx index 372d0be60fe..51260af1d2e 100644 --- a/web/src/app/chat/files/images/InputBarPreviewImage.tsx +++ b/web/src/app/chat/files/images/InputBarPreviewImage.tsx @@ -14,10 +14,24 @@ export function InputBarPreviewImage({ fileId }: { fileId: string }) { open={fullImageShowing} onOpenChange={(open) => setFullImageShowing(open)} /> -
    +
    setFullImageShowing(true)} - className="h-16 w-16 object-cover rounded-lg bg-background cursor-pointer" + className="h-8 w-8 object-cover rounded-lg bg-background cursor-pointer" src={buildImgUrl(fileId)} />
    diff --git a/web/src/app/chat/folders/FolderList.tsx b/web/src/app/chat/folders/FolderList.tsx index 8fa0f05a484..734f9bf6a0d 100644 --- a/web/src/app/chat/folders/FolderList.tsx +++ b/web/src/app/chat/folders/FolderList.tsx @@ -1,6 +1,6 @@ "use client"; -import React, { useState, useEffect } from "react"; +import React, { useState, useEffect, useRef } from "react"; import { Folder } from "./interfaces"; import { ChatSessionDisplay } from "../sessionSidebar/ChatSessionDisplay"; // Ensure this is correctly imported import { @@ -22,18 +22,20 @@ import { usePopup } from "@/components/admin/connectors/Popup"; import { useRouter } from "next/navigation"; import { CHAT_SESSION_ID_KEY } from "@/lib/drag/constants"; import Cookies from "js-cookie"; - +import { Popover } from "@/components/popover/Popover"; const FolderItem = ({ folder, currentChatId, isInitiallyExpanded, + initiallySelected, }: { folder: Folder; currentChatId?: number; isInitiallyExpanded: boolean; + initiallySelected: boolean; }) => { const [isExpanded, setIsExpanded] = useState(isInitiallyExpanded); - const [isEditing, setIsEditing] = useState(false); + const [isEditing, setIsEditing] = useState(initiallySelected); const [editedFolderName, setEditedFolderName] = useState( folder.folder_name ); @@ -54,6 +56,8 @@ const FolderItem = ({ if (newIsExpanded) { openedFolders[folder.folder_id] = true; } else { + setShowDeleteConfirm(false); + delete openedFolders[folder.folder_id]; } Cookies.set("openedFolders", JSON.stringify(openedFolders)); @@ -77,28 +81,67 @@ const FolderItem = ({ } }; - const saveFolderName = async () => { + const saveFolderName = async (continueEditing?: boolean) => { try { await updateFolderName(folder.folder_id, editedFolderName); - setIsEditing(false); + if (!continueEditing) { + setIsEditing(false); + } router.refresh(); // Refresh values to update the sidebar } catch (error) { setPopup({ message: "Failed to save folder name", type: "error" }); } }; - const deleteFolderHandler = async ( - event: React.MouseEvent - ) => { - event.stopPropagation(); // Prevent the event from bubbling up to the toggle expansion + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); + const deleteConfirmRef = useRef(null); + + const handleDeleteClick = (event: React.MouseEvent) => { + event.stopPropagation(); + setShowDeleteConfirm(true); + }; + + const confirmDelete = async (event: React.MouseEvent) => { + event.stopPropagation(); try { await deleteFolder(folder.folder_id); - router.refresh(); // Refresh values to update the sidebar + router.refresh(); } catch (error) { setPopup({ message: "Failed to delete folder", type: "error" }); + } finally { + setShowDeleteConfirm(false); } }; + const cancelDelete = (event: React.MouseEvent) => { + event.stopPropagation(); + setShowDeleteConfirm(false); + }; + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + deleteConfirmRef.current && + !deleteConfirmRef.current.contains(event.target as Node) + ) { + setShowDeleteConfirm(false); + } + }; + + document.addEventListener("mousedown", handleClickOutside); + return () => { + document.removeEventListener("mousedown", handleClickOutside); + }; + }, []); + + const inputRef = useRef(null); + + useEffect(() => { + if (initiallySelected && inputRef.current) { + inputRef.current.focus(); + } + }, [initiallySelected]); + const handleDrop = async (event: React.DragEvent) => { event.preventDefault(); setIsDragOver(false); @@ -140,7 +183,7 @@ const FolderItem = ({ onMouseLeave={() => setIsHovering(false)} >
    -
    +
    {isExpanded ? ( @@ -153,10 +196,12 @@ const FolderItem = ({
    {isEditing ? ( saveFolderName(true)} className="text-sm px-1 flex-1 min-w-0 -my-px mr-2" /> ) : ( @@ -172,18 +217,52 @@ const FolderItem = ({ >
    -
    - +
    + + +
    + } + popover={ +
    +

    + Are you sure you want to delete{" "} + {folder.folder_name}? All the content inside + this folder will also be deleted. +

    +
    + + +
    +
    + } + side="top" + align="center" + />
    )} + {isEditing && (
    saveFolderName()} className="hover:bg-black/10 p-1 -m-1 rounded" > @@ -220,25 +299,36 @@ export const FolderList = ({ folders, currentChatId, openedFolders, + newFolderId, }: { folders: Folder[]; currentChatId?: number; - openedFolders: { [key: number]: boolean }; + openedFolders?: { [key: number]: boolean }; + newFolderId: number | null; }) => { if (folders.length === 0) { return null; } return ( -
    +
    {folders.map((folder) => ( ))} + {folders.length == 1 && folders[0].chat_sessions.length == 0 && ( +

    + {" "} + Drag a chat into a folder to save for later{" "} +

    + )}
    ); }; diff --git a/web/src/app/chat/folders/FolderManagement.tsx b/web/src/app/chat/folders/FolderManagement.tsx index 1dd87ccd99f..b1d245147ce 100644 --- a/web/src/app/chat/folders/FolderManagement.tsx +++ b/web/src/app/chat/folders/FolderManagement.tsx @@ -13,7 +13,7 @@ export async function createFolder(folderName: string): Promise { throw new Error("Failed to create folder"); } const data = await response.json(); - return data.folder_id; + return data; } // Function to add a chat session to a folder diff --git a/web/src/app/chat/input/ChatInputAssistant.tsx b/web/src/app/chat/input/ChatInputAssistant.tsx new file mode 100644 index 00000000000..d2d062eb2f1 --- /dev/null +++ b/web/src/app/chat/input/ChatInputAssistant.tsx @@ -0,0 +1,52 @@ +"use client"; + +import { Persona } from "@/app/admin/assistants/interfaces"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { Tooltip } from "@/components/tooltip/Tooltip"; +import { ForwardedRef, forwardRef, useState } from "react"; +import { FiX } from "react-icons/fi"; + +interface DocumentSidebarProps { + alternativeAssistant: Persona; + unToggle: () => void; +} + +export const ChatInputAssistant = forwardRef< + HTMLDivElement, + DocumentSidebarProps +>(({ alternativeAssistant, unToggle }, ref: ForwardedRef) => { + const [isHovered, setIsHovered] = useState(false); + + return ( +
    setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + className="flex-none h-10 duration-300 h-10 items-center rounded-lg bg-background-150" + > + {alternativeAssistant.description}

    + } + > +
    + +

    + {alternativeAssistant.name} +

    +
    + +
    +
    +
    +
    + ); +}); + +ChatInputAssistant.displayName = "TempAssistant"; +export default ChatInputAssistant; diff --git a/web/src/app/chat/input/ChatInputBar.tsx b/web/src/app/chat/input/ChatInputBar.tsx index 5664fb76584..b579abefeed 100644 --- a/web/src/app/chat/input/ChatInputBar.tsx +++ b/web/src/app/chat/input/ChatInputBar.tsx @@ -1,61 +1,79 @@ -import React, { - Dispatch, - SetStateAction, - useEffect, - useRef, - useState, -} from "react"; -import { - FiSend, - FiFilter, - FiPlusCircle, - FiCpu, - FiX, - FiPlus, - FiInfo, -} from "react-icons/fi"; -import ChatInputOption from "./ChatInputOption"; -import { FaBrain } from "react-icons/fa"; +import React, { useContext, useEffect, useRef, useState } from "react"; +import { FiPlusCircle, FiPlus, FiInfo, FiX } from "react-icons/fi"; +import { ChatInputOption } from "./ChatInputOption"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { FilterManager, LlmOverrideManager } from "@/lib/hooks"; +import { InputPrompt } from "@/app/admin/prompt-library/interfaces"; +import { + FilterManager, + getDisplayNameForModel, + LlmOverrideManager, +} from "@/lib/hooks"; import { SelectedFilterDisplay } from "./SelectedFilterDisplay"; import { useChatContext } from "@/components/context/ChatContext"; import { getFinalLLM } from "@/lib/llm/utils"; -import { FileDescriptor } from "../interfaces"; -import { InputBarPreview } from "../files/InputBarPreview"; -import { RobotIcon } from "@/components/icons/icons"; -import { Hoverable } from "@/components/Hoverable"; +import { ChatFileType, FileDescriptor } from "../interfaces"; +import { + InputBarPreview, + InputBarPreviewImageProvider, +} from "../files/InputBarPreview"; +import { + AssistantsIconSkeleton, + CpuIconSkeleton, + FileIcon, + SendIcon, + StopGeneratingIcon, +} from "@/components/icons/icons"; +import { IconType } from "react-icons"; +import Popup from "../../../components/popup/Popup"; +import { LlmTab } from "../modal/configuration/LlmTab"; +import { AssistantsTab } from "../modal/configuration/AssistantsTab"; +import { DanswerDocument } from "@/lib/search/interfaces"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; import { Tooltip } from "@/components/tooltip/Tooltip"; +import { Hoverable } from "@/components/Hoverable"; +import { SettingsContext } from "@/components/settings/SettingsProvider"; +import { ChatState } from "../types"; + const MAX_INPUT_HEIGHT = 200; export function ChatInputBar({ - personas, + openModelSettings, + showDocs, + selectedDocuments, message, setMessage, + stopGenerating, onSubmit, - isStreaming, - setIsCancelled, - retrievalDisabled, filterManager, llmOverrideManager, - onSetSelectedAssistant, + chatState, + + // assistants selectedAssistant, + assistantOptions, + setSelectedAssistant, + setAlternativeAssistant, + files, setFiles, handleFileUpload, - setConfigModalActiveTab, textAreaRef, alternativeAssistant, + chatSessionId, + inputPrompts, }: { - onSetSelectedAssistant: (alternativeAssistant: Persona | null) => void; - personas: Persona[]; + openModelSettings: () => void; + chatState: ChatState; + stopGenerating: () => void; + showDocs: () => void; + selectedDocuments: DanswerDocument[]; + assistantOptions: Persona[]; + setAlternativeAssistant: (alternativeAssistant: Persona | null) => void; + setSelectedAssistant: (assistant: Persona) => void; + inputPrompts: InputPrompt[]; message: string; setMessage: (message: string) => void; onSubmit: () => void; - isStreaming: boolean; - setIsCancelled: (value: boolean) => void; - retrievalDisabled: boolean; filterManager: FilterManager; llmOverrideManager: LlmOverrideManager; selectedAssistant: Persona; @@ -63,10 +81,9 @@ export function ChatInputBar({ files: FileDescriptor[]; setFiles: (files: FileDescriptor[]) => void; handleFileUpload: (files: File[]) => void; - setConfigModalActiveTab: (tab: string) => void; textAreaRef: React.RefObject; + chatSessionId?: number; }) { - // handle re-sizing of the text area useEffect(() => { const textarea = textAreaRef.current; if (textarea) { @@ -94,28 +111,35 @@ export function ChatInputBar({ } } }; + const settings = useContext(SettingsContext); const { llmProviders } = useChatContext(); const [_, llmName] = getFinalLLM(llmProviders, selectedAssistant, null); const suggestionsRef = useRef(null); const [showSuggestions, setShowSuggestions] = useState(false); + const [showPrompts, setShowPrompts] = useState(false); const interactionsRef = useRef(null); const hideSuggestions = () => { setShowSuggestions(false); - setAssistantIconIndex(0); + setTabbingIconIndex(0); }; - // Update selected persona - const updateCurrentPersona = (persona: Persona) => { - onSetSelectedAssistant(persona.id == selectedAssistant.id ? null : persona); - hideSuggestions(); - setMessage(""); + const hidePrompts = () => { + setTimeout(() => { + setShowPrompts(false); + }, 50); + + setTabbingIconIndex(0); + }; + + const updateInputPrompt = (prompt: InputPrompt) => { + hidePrompts(); + setMessage(`${prompt.content}`); }; - // Click out of assistant suggestions useEffect(() => { const handleClickOutside = (event: MouseEvent) => { if ( @@ -125,6 +149,7 @@ export function ChatInputBar({ !interactionsRef.current.contains(event.target as Node)) ) { hideSuggestions(); + hidePrompts(); } }; document.addEventListener("mousedown", handleClickOutside); @@ -133,27 +158,49 @@ export function ChatInputBar({ }; }, []); - // Complete user input handling - const handleInputChange = (event: React.ChangeEvent) => { - const text = event.target.value; - setMessage(text); + const updatedTaggedAssistant = (assistant: Persona) => { + setAlternativeAssistant( + assistant.id == selectedAssistant.id ? null : assistant + ); + hideSuggestions(); + setMessage(""); + }; + const handleAssistantInput = (text: string) => { if (!text.startsWith("@")) { hideSuggestions(); - return; + } else { + const match = text.match(/(?:\s|^)@(\w*)$/); + if (match) { + setShowSuggestions(true); + } else { + hideSuggestions(); + } } + }; - // If looking for an assistant...fup - const match = text.match(/(?:\s|^)@(\w*)$/); - if (match) { - setShowSuggestions(true); + const handlePromptInput = (text: string) => { + if (!text.startsWith("/")) { + hidePrompts(); } else { - hideSuggestions(); + const promptMatch = text.match(/(?:\s|^)\/(\w*)$/); + if (promptMatch) { + setShowPrompts(true); + } else { + hidePrompts(); + } } }; - const filteredPersonas = personas.filter((persona) => - persona.name.toLowerCase().startsWith( + const handleInputChange = (event: React.ChangeEvent) => { + const text = event.target.value; + setMessage(text); + handleAssistantInput(text); + handlePromptInput(text); + }; + + const assistantTagOptions = assistantOptions.filter((assistant) => + assistant.name.toLowerCase().startsWith( message .slice(message.lastIndexOf("@") + 1) .split(/\s/)[0] @@ -161,78 +208,111 @@ export function ChatInputBar({ ) ); - const [assistantIconIndex, setAssistantIconIndex] = useState(0); + const filteredPrompts = inputPrompts.filter( + (prompt) => + prompt.active && + prompt.prompt.toLowerCase().startsWith( + message + .slice(message.lastIndexOf("/") + 1) + .split(/\s/)[0] + .toLowerCase() + ) + ); + + const [tabbingIconIndex, setTabbingIconIndex] = useState(0); const handleKeyDown = (e: React.KeyboardEvent) => { if ( - showSuggestions && - filteredPersonas.length > 0 && + ((showSuggestions && assistantTagOptions.length > 0) || showPrompts) && (e.key === "Tab" || e.key == "Enter") ) { e.preventDefault(); - if (assistantIconIndex == filteredPersonas.length) { - window.open("/assistants/new", "_blank"); - hideSuggestions(); - setMessage(""); + + if ( + (tabbingIconIndex == assistantTagOptions.length && showSuggestions) || + (tabbingIconIndex == filteredPrompts.length && showPrompts) + ) { + if (showPrompts) { + window.open("/prompts", "_self"); + } else { + window.open("/assistants/new", "_self"); + } } else { - const option = - filteredPersonas[assistantIconIndex >= 0 ? assistantIconIndex : 0]; - updateCurrentPersona(option); + if (showPrompts) { + const uppity = + filteredPrompts[tabbingIconIndex >= 0 ? tabbingIconIndex : 0]; + updateInputPrompt(uppity); + } else { + const option = + assistantTagOptions[tabbingIconIndex >= 0 ? tabbingIconIndex : 0]; + + updatedTaggedAssistant(option); + } } - } else if (e.key === "ArrowDown") { + } + if (!showPrompts && !showSuggestions) { + return; + } + + if (e.key === "ArrowDown") { e.preventDefault(); - setAssistantIconIndex((assistantIconIndex) => - Math.min(assistantIconIndex + 1, filteredPersonas.length) + + setTabbingIconIndex((tabbingIconIndex) => + Math.min( + tabbingIconIndex + 1, + showPrompts ? filteredPrompts.length : assistantTagOptions.length + ) ); } else if (e.key === "ArrowUp") { e.preventDefault(); - setAssistantIconIndex((assistantIconIndex) => - Math.max(assistantIconIndex - 1, 0) + setTabbingIconIndex((tabbingIconIndex) => + Math.max(tabbingIconIndex - 1, 0) ); } }; return ( -
    -
    +
    +
    - {showSuggestions && filteredPersonas.length > 0 && ( + {showSuggestions && assistantTagOptions.length > 0 && (
    -
    - {filteredPersonas.map((currentPersona, index) => ( +
    + {assistantTagOptions.map((currentAssistant, index) => ( ))} + @@ -242,22 +322,60 @@ export function ChatInputBar({
    )} + {showPrompts && ( + + )} +
    -
    - +

    {alternativeAssistant.name}

    -
    +
    @@ -287,30 +405,56 @@ export function ChatInputBar({ onSetSelectedAssistant(null)} + onClick={() => setAlternativeAssistant(null)} />
    )} - - {files.length > 0 && ( -
    - {files.map((file) => ( -
    - { - setFiles( - files.filter( - (fileInFilter) => fileInFilter.id !== file.id - ) - ); - }} - isUploading={file.isUploading || false} - /> -
    - ))} + {(selectedDocuments.length > 0 || files.length > 0) && ( +
    +
    + {selectedDocuments.length > 0 && ( + + )} + {files.map((file) => ( +
    + {file.type === ChatFileType.IMAGE ? ( + { + setFiles( + files.filter( + (fileInFilter) => fileInFilter.id !== file.id + ) + ); + }} + isUploading={file.isUploading || false} + /> + ) : ( + { + setFiles( + files.filter( + (fileInFilter) => fileInFilter.id !== file.id + ) + ); + }} + isUploading={file.isUploading || false} + /> + )} +
    + ))} +
    )} @@ -324,24 +468,22 @@ export function ChatInputBar({ w-full shrink resize-none + rounded-lg border-0 - bg-background-weak + bg-background-100 ${ textAreaRef.current && textAreaRef.current.scrollHeight > MAX_INPUT_HEIGHT ? "overflow-y-auto mt-2" : "" } - overflow-hidden whitespace-normal break-word overscroll-contain outline-none placeholder-subtle - overflow-hidden resize-none - pl-4 - pr-12 + px-5 py-4 h-14 `} @@ -349,54 +491,97 @@ export function ChatInputBar({ style={{ scrollbarWidth: "thin" }} role="textarea" aria-multiline - placeholder="Send a message..." + placeholder={`Send a message ${ + !settings?.isMobile ? "or try using @ or /" : "" + }`} value={message} onKeyDown={(event) => { if ( event.key === "Enter" && + !showPrompts && + !showSuggestions && !event.shiftKey && - message && - !isStreaming + !(event.nativeEvent as any).isComposing ) { - onSubmit(); event.preventDefault(); + if (message) { + onSubmit(); + } } }} suppressContentEditableWarning={true} /> -
    - + ( + { + setSelectedAssistant(assistant); + close(); + }} + /> + )} flexPriority="shrink" - name={selectedAssistant ? selectedAssistant.name : "Assistants"} - icon={FaBrain} - onClick={() => setConfigModalActiveTab("assistants")} - /> - - setConfigModalActiveTab("llms")} - /> - - {!retrievalDisabled && ( + position="top" + mobilePosition="top-right" + > setConfigModalActiveTab("filters")} + toggle + flexPriority="shrink" + name={ + selectedAssistant ? selectedAssistant.name : "Assistants" + } + Icon={AssistantsIconSkeleton as IconType} /> - )} + + ( + + )} + position="top" + > + + { const input = document.createElement("input"); input.type = "file"; @@ -413,26 +598,38 @@ export function ChatInputBar({ }} />
    -
    -
    { - if (!isStreaming) { + +
    + {chatState == "streaming" || + chatState == "toolBuilding" || + chatState == "loading" ? ( + + ) : ( +
    + }} + disabled={chatState != "input"} + > + + + )}
    diff --git a/web/src/app/chat/input/ChatInputOption.tsx b/web/src/app/chat/input/ChatInputOption.tsx index 0d221116461..d2d7bc5fde9 100644 --- a/web/src/app/chat/input/ChatInputOption.tsx +++ b/web/src/app/chat/input/ChatInputOption.tsx @@ -1,110 +1,103 @@ -import React, { useState } from "react"; -import { IconType } from "react-icons"; -import { DefaultDropdownElement } from "../../../components/Dropdown"; -import { Popover } from "../../../components/popover/Popover"; +import React, { useState, useRef, useEffect } from "react"; +import { + ChevronDownIcon, + ChevronRightIcon, + IconProps, +} from "@/components/icons/icons"; interface ChatInputOptionProps { - name: string; - icon: IconType; - onClick: () => void; + name?: string; + Icon: ({ size, className }: IconProps) => JSX.Element; + onClick?: () => void; size?: number; - - options?: { name: string; value: number; onClick?: () => void }[]; + tooltipContent?: React.ReactNode; flexPriority?: "shrink" | "stiff" | "second"; + toggle?: boolean; } -const ChatInputOption = ({ +export const ChatInputOption: React.FC = ({ name, - icon: Icon, - onClick, + Icon, + // icon: Icon, size = 16, - options, flexPriority, -}: ChatInputOptionProps) => { + tooltipContent, + toggle, + onClick, +}) => { const [isDropupVisible, setDropupVisible] = useState(false); + const [isTooltipVisible, setIsTooltipVisible] = useState(false); + const componentRef = useRef(null); + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + componentRef.current && + !componentRef.current.contains(event.target as Node) + ) { + setIsTooltipVisible(false); + setDropupVisible(false); + } + }; - const handleClick = () => { - setDropupVisible(!isDropupVisible); - // onClick(); - }; + document.addEventListener("mousedown", handleClickOutside); + return () => { + document.removeEventListener("mousedown", handleClickOutside); + }; + }, []); - const dropdownContent = options ? ( + return (
    - {options.map((option) => ( - { - if (option.onClick) { - option.onClick(); - setDropupVisible(false); - } - }} - isSelected={false} - /> - ))} -
    - ) : null; - - const option = ( -
    -
    - - {name} + +
    + {name && {name}} + {toggle && ( + + )}
    -
    - ); - - if (!dropdownContent) { - return ( -
    - {option} -
    - ); - } - return ( - + {isTooltipVisible && tooltipContent && ( +
    + {tooltipContent} +
    + )} +
    ); }; - -export default ChatInputOption; diff --git a/web/src/app/chat/interfaces.ts b/web/src/app/chat/interfaces.ts index 902f5b86553..b4ba2e97475 100644 --- a/web/src/app/chat/interfaces.ts +++ b/web/src/app/chat/interfaces.ts @@ -1,4 +1,8 @@ -import { DanswerDocument, Filters } from "@/lib/search/interfaces"; +import { + DanswerDocument, + Filters, + SearchDanswerDocument, +} from "@/lib/search/interfaces"; export enum RetrievalType { None = "none", @@ -30,10 +34,15 @@ export interface FileDescriptor { id: string; type: ChatFileType; name?: string | null; + // FE only isUploading?: boolean; } +export interface LLMRelevanceFilterPacket { + relevant_chunk_indices: number[]; +} + export interface ToolCallMetadata { tool_name: string; tool_args: Record; @@ -56,6 +65,13 @@ export interface ChatSession { current_alternate_model: string; } +export interface SearchSession { + search_session_id: number; + documents: SearchDanswerDocument[]; + messages: BackendMessage[]; + description: string; +} + export interface Message { messageId: number; message: string; @@ -71,6 +87,8 @@ export interface Message { childrenMessageIds?: number[]; latestChildMessageId?: number | null; alternateAssistantID?: number | null; + stackTrace?: string | null; + overridden_model?: string; } export interface BackendChatSession { @@ -86,6 +104,8 @@ export interface BackendChatSession { export interface BackendMessage { message_id: number; + comments: any; + chat_session_id: number; parent_message: number | null; latest_child_message: number | null; message: string; @@ -97,6 +117,12 @@ export interface BackendMessage { files: FileDescriptor[]; tool_calls: ToolCallFinalResult[]; alternate_assistant_id?: number | null; + overridden_model?: string; +} + +export interface MessageResponseIDInfo { + user_message_id: number | null; + reserved_assistant_message_id: number; } export interface DocumentsResponse { @@ -110,4 +136,5 @@ export interface ImageGenerationDisplay { export interface StreamingError { error: string; + stack_trace: string; } diff --git a/web/src/app/chat/lib.tsx b/web/src/app/chat/lib.tsx index c666914c26f..b17b94b7ec7 100644 --- a/web/src/app/chat/lib.tsx +++ b/web/src/app/chat/lib.tsx @@ -3,8 +3,8 @@ import { DanswerDocument, Filters, } from "@/lib/search/interfaces"; -import { handleStream } from "@/lib/search/streamingUtils"; -import { FeedbackType } from "./types"; +import { handleSSEStream, handleStream } from "@/lib/search/streamingUtils"; +import { ChatState, FeedbackType } from "./types"; import { Dispatch, MutableRefObject, @@ -20,6 +20,7 @@ import { FileDescriptor, ImageGenerationDisplay, Message, + MessageResponseIDInfo, RetrievalType, StreamingError, ToolCallMetadata, @@ -27,6 +28,37 @@ import { import { Persona } from "../admin/assistants/interfaces"; import { ReadonlyURLSearchParams } from "next/navigation"; import { SEARCH_PARAM_NAMES } from "./searchParams"; +import { Settings } from "../admin/settings/interfaces"; + +interface ChatRetentionInfo { + chatRetentionDays: number; + daysFromCreation: number; + daysUntilExpiration: number; + showRetentionWarning: boolean; +} + +export function getChatRetentionInfo( + chatSession: ChatSession, + settings: Settings +): ChatRetentionInfo { + // If `maximum_chat_retention_days` isn't set- never display retention warning. + const chatRetentionDays = settings.maximum_chat_retention_days || 10000; + const createdDate = new Date(chatSession.time_created); + const today = new Date(); + const daysFromCreation = Math.ceil( + (today.getTime() - createdDate.getTime()) / (1000 * 3600 * 24) + ); + const daysUntilExpiration = chatRetentionDays - daysFromCreation; + const showRetentionWarning = + chatRetentionDays < 7 ? daysUntilExpiration < 2 : daysUntilExpiration < 7; + + return { + chatRetentionDays, + daysFromCreation, + daysUntilExpiration, + showRetentionWarning, + }; +} export async function updateModelOverrideForChatSession( chatSessionId: number, @@ -78,9 +110,11 @@ export type PacketType = | AnswerPiecePacket | DocumentsResponse | ImageGenerationDisplay - | StreamingError; + | StreamingError + | MessageResponseIDInfo; export async function* sendMessage({ + regenerate, message, fileDescriptors, parentMessageId, @@ -96,7 +130,9 @@ export async function* sendMessage({ systemPromptOverride, useExistingUserMessage, alternateAssistantId, + signal, }: { + regenerate: boolean; message: string; fileDescriptors: FileDescriptor[]; parentMessageId: number | null; @@ -106,70 +142,70 @@ export async function* sendMessage({ selectedDocumentIds: number[] | null; queryOverride?: string; forceSearch?: boolean; - // LLM overrides modelProvider?: string; modelVersion?: string; temperature?: number; - // prompt overrides systemPromptOverride?: string; - // if specified, will use the existing latest user message - // and will ignore the specified `message` useExistingUserMessage?: boolean; alternateAssistantId?: number; -}) { + signal?: AbortSignal; +}): AsyncGenerator { const documentsAreSelected = selectedDocumentIds && selectedDocumentIds.length > 0; - const sendMessageResponse = await fetch("/api/chat/send-message", { + const body = JSON.stringify({ + alternate_assistant_id: alternateAssistantId, + chat_session_id: chatSessionId, + parent_message_id: parentMessageId, + message: message, + prompt_id: promptId, + search_doc_ids: documentsAreSelected ? selectedDocumentIds : null, + file_descriptors: fileDescriptors, + regenerate, + retrieval_options: !documentsAreSelected + ? { + run_search: + promptId === null || + promptId === undefined || + queryOverride || + forceSearch + ? "always" + : "auto", + real_time: true, + filters: filters, + } + : null, + query_override: queryOverride, + prompt_override: systemPromptOverride + ? { + system_prompt: systemPromptOverride, + } + : null, + llm_override: + temperature || modelVersion + ? { + temperature, + model_provider: modelProvider, + model_version: modelVersion, + } + : null, + use_existing_user_message: useExistingUserMessage, + }); + + const response = await fetch(`/api/chat/send-message`, { method: "POST", headers: { "Content-Type": "application/json", }, - body: JSON.stringify({ - alternate_assistant_id: alternateAssistantId, - chat_session_id: chatSessionId, - parent_message_id: parentMessageId, - message: message, - prompt_id: promptId, - search_doc_ids: documentsAreSelected ? selectedDocumentIds : null, - file_descriptors: fileDescriptors, - retrieval_options: !documentsAreSelected - ? { - run_search: - promptId === null || - promptId === undefined || - queryOverride || - forceSearch - ? "always" - : "auto", - real_time: true, - filters: filters, - } - : null, - query_override: queryOverride, - prompt_override: systemPromptOverride - ? { - system_prompt: systemPromptOverride, - } - : null, - llm_override: - temperature || modelVersion - ? { - temperature, - model_provider: modelProvider, - model_version: modelVersion, - } - : null, - use_existing_user_message: useExistingUserMessage, - }), + body, + signal, }); - if (!sendMessageResponse.ok) { - const errorJson = await sendMessageResponse.json(); - const errorMsg = errorJson.message || errorJson.detail || ""; - throw Error(`Failed to send message - ${errorMsg}`); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); } - yield* handleStream(sendMessageResponse); + yield* handleSSEStream(response); } export async function nameChatSession(chatSessionId: number, message: string) { @@ -353,13 +389,12 @@ export function getLastSuccessfulMessageId(messageHistory: Message[]) { .reverse() .find( (message) => - message.type === "assistant" && + (message.type === "assistant" || message.type === "system") && message.messageId !== -1 && message.messageId !== null ); return lastSuccessfulMessage ? lastSuccessfulMessage?.messageId : null; } - export function processRawChatHistory( rawMessages: BackendMessage[] ): Map { @@ -402,6 +437,7 @@ export function processRawChatHistory( parentMessageId: messageInfo.parent_message, childrenMessageIds: [], latestChildMessageId: messageInfo.latest_child_message, + overridden_model: messageInfo.overridden_model, }; messages.set(messageInfo.message_id, message); @@ -505,7 +541,7 @@ export function removeMessage( export function checkAnyAssistantHasSearch( messageHistory: Message[], - availablePersonas: Persona[], + availableAssistants: Persona[], livePersona: Persona ): boolean { const response = @@ -516,11 +552,9 @@ export function checkAnyAssistantHasSearch( ) { return false; } - - const alternateAssistant = availablePersonas.find( - (persona) => persona.id === message.alternateAssistantID + const alternateAssistant = availableAssistants.find( + (assistant) => assistant.id === message.alternateAssistantID ); - return alternateAssistant ? personaIncludesRetrieval(alternateAssistant) : false; @@ -537,6 +571,13 @@ export function personaIncludesRetrieval(selectedPersona: Persona) { ); } +export function personaIncludesImage(selectedPersona: Persona) { + return selectedPersona.tools.some( + (tool) => + tool.in_code_tool_id && tool.in_code_tool_id == "ImageGenerationTool" + ); +} + const PARAMS_TO_SKIP = [ SEARCH_PARAM_NAMES.SUBMIT_ON_LOAD, SEARCH_PARAM_NAMES.USER_MESSAGE, @@ -549,11 +590,16 @@ const PARAMS_TO_SKIP = [ export function buildChatUrl( existingSearchParams: ReadonlyURLSearchParams, chatSessionId: number | null, - personaId: number | null + personaId: number | null, + search?: boolean ) { const finalSearchParams: string[] = []; if (chatSessionId) { - finalSearchParams.push(`${SEARCH_PARAM_NAMES.CHAT_ID}=${chatSessionId}`); + finalSearchParams.push( + `${ + search ? SEARCH_PARAM_NAMES.SEARCH_ID : SEARCH_PARAM_NAMES.CHAT_ID + }=${chatSessionId}` + ); } if (personaId !== null) { finalSearchParams.push(`${SEARCH_PARAM_NAMES.PERSONA_ID}=${personaId}`); @@ -567,10 +613,10 @@ export function buildChatUrl( const finalSearchParamsString = finalSearchParams.join("&"); if (finalSearchParamsString) { - return `/chat?${finalSearchParamsString}`; + return `/${search ? "search" : "chat"}?${finalSearchParamsString}`; } - return "/chat"; + return `/${search ? "search" : "chat"}`; } export async function uploadFilesForChat( @@ -594,19 +640,20 @@ export async function uploadFilesForChat( } export async function useScrollonStream({ - isStreaming, + chatState, scrollableDivRef, scrollDist, endDivRef, distance, debounce, }: { - isStreaming: boolean; + chatState: ChatState; scrollableDivRef: RefObject; scrollDist: MutableRefObject; endDivRef: RefObject; distance: number; debounce: number; + mobile?: boolean; }) { const preventScrollInterference = useRef(false); const preventScroll = useRef(false); @@ -614,7 +661,7 @@ export async function useScrollonStream({ const previousScroll = useRef(0); useEffect(() => { - if (isStreaming && scrollableDivRef && scrollableDivRef.current) { + if (chatState != "input" && scrollableDivRef && scrollableDivRef.current) { let newHeight: number = scrollableDivRef.current?.scrollTop!; const heightDifference = newHeight - previousScroll.current; previousScroll.current = newHeight; @@ -670,8 +717,8 @@ export async function useScrollonStream({ // scroll on end of stream if within distance useEffect(() => { - if (scrollableDivRef?.current && !isStreaming) { - if (scrollDist.current < distance) { + if (scrollableDivRef?.current && chatState == "input") { + if (scrollDist.current < distance - 50) { scrollableDivRef?.current?.scrollBy({ left: 0, top: Math.max(scrollDist.current + 600, 0), @@ -679,5 +726,5 @@ export async function useScrollonStream({ }); } } - }, [isStreaming]); + }, [chatState]); } diff --git a/web/src/app/chat/message/CodeBlock.tsx b/web/src/app/chat/message/CodeBlock.tsx index b9866452bb3..7da83195b43 100644 --- a/web/src/app/chat/message/CodeBlock.tsx +++ b/web/src/app/chat/message/CodeBlock.tsx @@ -64,9 +64,15 @@ export function CodeBlock({ codeLines.pop(); // Remove the last line with the trailing backticks } - // remove leading whitespace from each line for nicer copy/paste experience - const trimmedCodeLines = codeLines.map((line) => line.trimStart()); - codeText = trimmedCodeLines.join("\n"); + const minIndent = codeLines + .filter((line) => line.trim().length > 0) + .reduce((min, line) => { + const match = line.match(/^\s*/); + return Math.min(min, match ? match[0].length : 0); + }, Infinity); + + const formattedCodeLines = codeLines.map((line) => line.slice(minIndent)); + codeText = formattedCodeLines.join("\n"); } } @@ -91,7 +97,8 @@ export function CodeBlock({ codeText = findTextNode(props.node); } - const handleCopy = () => { + const handleCopy = (event: React.MouseEvent) => { + event.preventDefault(); if (!codeText) { return; } @@ -109,7 +116,7 @@ export function CodeBlock({ {codeText && (
    {copied ? (
    @@ -126,9 +133,7 @@ export function CodeBlock({ )}
    -        
    -          {children}
    -        
    +        {children}
           
    ); diff --git a/web/src/app/chat/message/Messages.tsx b/web/src/app/chat/message/Messages.tsx index a28488d8cc4..09cacd1b9f1 100644 --- a/web/src/app/chat/message/Messages.tsx +++ b/web/src/app/chat/message/Messages.tsx @@ -1,11 +1,6 @@ "use client"; import { - FiCpu, - FiImage, - FiThumbsDown, - FiThumbsUp, - FiUser, FiEdit2, FiChevronRight, FiChevronLeft, @@ -13,12 +8,21 @@ import { FiGlobe, } from "react-icons/fi"; import { FeedbackType } from "../types"; -import { useEffect, useRef, useState } from "react"; +import { + Dispatch, + SetStateAction, + useContext, + useEffect, + useRef, + useState, +} from "react"; import ReactMarkdown from "react-markdown"; -import { DanswerDocument } from "@/lib/search/interfaces"; -import { SearchSummary, ShowHideDocsButton } from "./SearchSummary"; +import { + DanswerDocument, + FilteredDanswerDocument, +} from "@/lib/search/interfaces"; +import { SearchSummary } from "./SearchSummary"; import { SourceIcon } from "@/components/SourceIcon"; -import { ThreeDots } from "react-loader-spinner"; import { SkippedSearch } from "./SkippedSearch"; import remarkGfm from "remark-gfm"; import { CopyButton } from "@/components/CopyButton"; @@ -29,20 +33,38 @@ import { INTERNET_SEARCH_TOOL_NAME, } from "../tools/constants"; import { ToolRunDisplay } from "../tools/ToolRunningAnimation"; -import { Hoverable } from "@/components/Hoverable"; +import { Hoverable, HoverableIcon } from "@/components/Hoverable"; import { DocumentPreview } from "../files/documents/DocumentPreview"; import { InMessageImage } from "../files/images/InMessageImage"; import { CodeBlock } from "./CodeBlock"; import rehypePrism from "rehype-prism-plus"; -// Prism stuff -import Prism from "prismjs"; - import "prismjs/themes/prism-tomorrow.css"; import "./custom-code-styles.css"; import { Persona } from "@/app/admin/assistants/interfaces"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { Citation } from "@/components/search/results/Citation"; +import { DocumentMetadataBlock } from "@/components/search/DocumentDisplay"; + +import { + ThumbsUpIcon, + ThumbsDownIcon, + LikeFeedback, + DislikeFeedback, +} from "@/components/icons/icons"; +import { + CustomTooltip, + TooltipGroup, +} from "@/components/tooltip/CustomTooltip"; +import { ValidSources } from "@/lib/types"; +import { Tooltip } from "@/components/tooltip/Tooltip"; +import { useMouseTracking } from "./hooks"; import { InternetSearchIcon } from "@/components/InternetSearchIcon"; +import { SettingsContext } from "@/components/settings/SettingsProvider"; +import GeneratingImageDisplay from "../tools/GeneratingImageDisplay"; +import RegenerateOption from "../RegenerateOption"; +import { LlmOverride } from "@/lib/hooks"; +import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal"; const TOOLS_WITH_CUSTOM_HANDLING = [ SEARCH_TOOL_NAME, @@ -50,14 +72,23 @@ const TOOLS_WITH_CUSTOM_HANDLING = [ IMAGE_GENERATION_TOOL_NAME, ]; -function FileDisplay({ files }: { files: FileDescriptor[] }) { +function FileDisplay({ + files, + alignBubble, +}: { + files: FileDescriptor[]; + alignBubble?: boolean; +}) { const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE); const nonImgFiles = files.filter((file) => file.type !== ChatFileType.IMAGE); return ( <> {nonImgFiles && nonImgFiles.length > 0 && ( -
    +
    {nonImgFiles.map((file) => { return ( @@ -65,6 +96,7 @@ function FileDisplay({ files }: { files: FileDescriptor[] }) {
    ); @@ -73,8 +105,11 @@ function FileDisplay({ files }: { files: FileDescriptor[] }) {
    )} {imageFiles && imageFiles.length > 0 && ( -
    -
    +
    +
    {imageFiles.map((file) => { return ; })} @@ -86,10 +121,17 @@ function FileDisplay({ files }: { files: FileDescriptor[] }) { } export const AIMessage = ({ + regenerate, + overriddenModel, + shared, + isActive, + toggleDocumentSelection, alternativeAssistant, + docs, messageId, content, files, + selectedDocuments, query, personaName, citedDocuments, @@ -103,7 +145,16 @@ export const AIMessage = ({ handleForceSearch, retrievalDisabled, currentPersona, + otherMessagesCanSwitchTo, + onMessageSelection, }: { + shared?: boolean; + isActive?: boolean; + otherMessagesCanSwitchTo?: number[]; + onMessageSelection?: (messageId: number) => void; + selectedDocuments?: DanswerDocument[] | null; + toggleDocumentSelection?: () => void; + docs?: DanswerDocument[] | null; alternativeAssistant?: Persona | null; currentPersona: Persona; messageId: number | null; @@ -121,17 +172,49 @@ export const AIMessage = ({ handleSearchQueryEdit?: (query: string) => void; handleForceSearch?: () => void; retrievalDisabled?: boolean; + overriddenModel?: string; + regenerate?: (modelOverRide: LlmOverride) => Promise; }) => { - const [isReady, setIsReady] = useState(false); - useEffect(() => { - Prism.highlightAll(); - setIsReady(true); - }, []); + const toolCallGenerating = toolCall && !toolCall.tool_result; + const processContent = (content: string | JSX.Element) => { + if (typeof content !== "string") { + return content; + } + const codeBlockRegex = /```(\w*)\n[\s\S]*?```|```[\s\S]*?$/g; + const matches = content.match(codeBlockRegex); + + if (matches) { + content = matches.reduce((acc, match) => { + if (!match.match(/```\w+/)) { + return acc.replace(match, match.replace("```", "```plaintext")); + } + return acc; + }, content); + + const lastMatch = matches[matches.length - 1]; + if (!lastMatch.endsWith("```")) { + return content; + } + } + + return content + (!isComplete && !toolCallGenerating ? " [*]() " : ""); + }; + const finalContent = processContent(content as string); + + const [isRegenerateHovered, setIsRegenerateHovered] = useState(false); + const { isHovering, trackedElementRef, hoverElementRef } = useMouseTracking(); + + const settings = useContext(SettingsContext); // this is needed to give Prism a chance to load - if (!isReady) { - return
    ; - } + + const selectedDocumentIds = + selectedDocuments?.map((document) => document.document_id) || []; + let citedDocumentIds: string[] = []; + + citedDocuments?.forEach((doc) => { + citedDocumentIds.push(doc[1].document_id); + }); if (!isComplete) { const trimIncompleteCodeSection = ( @@ -148,249 +231,407 @@ export const AIMessage = ({ } return content; }; - content = trimIncompleteCodeSection(content); } - const danswerSearchToolEnabledForPersona = currentPersona.tools.some( - (tool) => tool.in_code_tool_id === SEARCH_TOOL_NAME - ); - const shouldShowLoader = - !toolCall || (toolCall.tool_name === SEARCH_TOOL_NAME && !content); - const defaultLoader = shouldShowLoader ? ( -
    - -
    - ) : undefined; + let filteredDocs: FilteredDanswerDocument[] = []; + + if (docs) { + filteredDocs = docs + .filter( + (doc, index, self) => + doc.document_id && + doc.document_id !== "" && + index === self.findIndex((d) => d.document_id === doc.document_id) + ) + .filter((doc) => { + return citedDocumentIds.includes(doc.document_id); + }) + .map((doc: DanswerDocument, ind: number) => { + return { + ...doc, + included: selectedDocumentIds.includes(doc.document_id), + }; + }); + } + + const currentMessageInd = messageId + ? otherMessagesCanSwitchTo?.indexOf(messageId) + : undefined; + const uniqueSources: ValidSources[] = Array.from( + new Set((docs || []).map((doc) => doc.source_type)) + ).slice(0, 3); + + const includeMessageSwitcher = + currentMessageInd !== undefined && + onMessageSelection && + otherMessagesCanSwitchTo && + otherMessagesCanSwitchTo.length > 1; return ( -
    -
    -
    +
    +
    +
    +
    +
    +
    +
    + {(!toolCall || toolCall.tool_name === SEARCH_TOOL_NAME) && ( + <> + {query !== undefined && + handleShowRetrieved !== undefined && + !retrievalDisabled && ( +
    + +
    + )} + {handleForceSearch && + content && + query === undefined && + !hasDocs && + !retrievalDisabled && ( +
    + +
    + )} + + )} + {toolCall && + !TOOLS_WITH_CUSTOM_HANDLING.includes( + toolCall.tool_name + ) && ( + + } + isRunning={!toolCall.tool_result || !content} + /> + )} -
    - {alternativeAssistant - ? alternativeAssistant.name - : personaName || "Danswer"} -
    - - {query === undefined && - hasDocs && - handleShowRetrieved !== undefined && - isCurrentlyShowingRetrieved !== undefined && - !retrievalDisabled && ( -
    -
    - -
    -
    - )} -
    + {toolCall && + (!files || files.length == 0) && + toolCall.tool_name === IMAGE_GENERATION_TOOL_NAME && + !toolCall.tool_result && } -
    - {(!toolCall || toolCall.tool_name === SEARCH_TOOL_NAME) && - danswerSearchToolEnabledForPersona && ( - <> - {query !== undefined && - handleShowRetrieved !== undefined && - isCurrentlyShowingRetrieved !== undefined && - !retrievalDisabled && ( -
    - } - handleShowRetrieved={handleShowRetrieved} - handleSearchQueryEdit={handleSearchQueryEdit} + isRunning={!toolCall.tool_result} /> -
    + )} + + {content || files ? ( + <> + + + {typeof content === "string" ? ( +
    + { + const { node, ...rest } = props; + const value = rest.children; + + if (value?.toString().startsWith("*")) { + return ( +
    + ); + } else if ( + value?.toString().startsWith("[") + ) { + // for some reason tags cause the onClick to not apply + // and the links are unclickable + // TODO: fix the fact that you have to double click to follow link + // for the first link + return ( + + {rest.children} + + ); + } else { + return ( + + rest.href + ? window.open(rest.href, "_blank") + : undefined + } + className="cursor-pointer text-link hover:text-link-hover" + > + {rest.children} + + ); + } + }, + code: (props) => ( + + ), + p: ({ node, ...props }) => ( +

    + ), + }} + remarkPlugins={[remarkGfm]} + rehypePlugins={[ + [rehypePrism, { ignoreMissing: true }], + ]} + > + {finalContent as string} + +

    + ) : ( + content + )} + + ) : isComplete ? null : ( + <> )} - {handleForceSearch && - content && - query === undefined && - !hasDocs && - !retrievalDisabled && ( -
    - + {isComplete && docs && docs.length > 0 && ( +
    +
    +
    + {!settings?.isMobile && + filteredDocs.length > 0 && + filteredDocs.slice(0, 2).map((doc, ind) => ( + + ))} +
    { + if (toggleDocumentSelection) { + toggleDocumentSelection(); + } + }} + key={-1} + className="cursor-pointer w-[200px] rounded-lg flex-none transition-all duration-500 hover:bg-background-125 bg-text-100 px-4 py-2 border-b" + > +
    +

    See context

    +
    + {uniqueSources.map((sourceType, ind) => { + return ( +
    + +
    + ); + })} +
    +
    +
    + See more +
    +
    +
    +
    )} - - )} - - {toolCall && - !TOOLS_WITH_CUSTOM_HANDLING.includes(toolCall.tool_name) && ( -
    - } - isRunning={!toolCall.tool_result || !content} - /> -
    - )} - - {toolCall && - toolCall.tool_name === IMAGE_GENERATION_TOOL_NAME && - !toolCall.tool_result && ( -
    - } - isRunning={!toolCall.tool_result} - /> -
    - )} +
    - {toolCall && toolCall.tool_name === INTERNET_SEARCH_TOOL_NAME && ( -
    - } - isRunning={!toolCall.tool_result} - /> -
    - )} - - {content ? ( - <> - - - {typeof content === "string" ? ( - { - const { node, ...rest } = props; - // for some reason tags cause the onClick to not apply - // and the links are unclickable - // TODO: fix the fact that you have to double click to follow link - // for the first link - return ( - - rest.href - ? window.open(rest.href, "_blank") - : undefined - } - className="cursor-pointer text-link hover:text-link-hover" - // href={rest.href} - // target="_blank" - // rel="noopener noreferrer" - > - {rest.children} - - ); - }, - code: (props) => ( - - ), - p: ({ node, ...props }) => ( -

    - ), - }} - remarkPlugins={[remarkGfm]} - rehypePlugins={[[rehypePrism, { ignoreMissing: true }]]} - > - {content} - - ) : ( - content - )} - - ) : isComplete ? null : ( - defaultLoader - )} - {citedDocuments && citedDocuments.length > 0 && ( -

    - Sources: -
    - {citedDocuments - .filter(([_, document]) => document.semantic_identifier) - .map(([citationKey, document], ind) => { - const display = ( -
    -
    - {document.is_internet ? ( - - ) : ( - + {handleFeedback && + (isActive ? ( +
    + +
    + {includeMessageSwitcher && ( +
    + { + onMessageSelection( + otherMessagesCanSwitchTo[ + currentMessageInd - 1 + ] + ); + }} + handleNext={() => { + onMessageSelection( + otherMessagesCanSwitchTo[ + currentMessageInd + 1 + ] + ); + }} + /> +
    )}
    - [{citationKey}] {document!.semantic_identifier} -
    - ); - if (document.link) { - return ( - - {display} - - ); - } else { - return ( -
    - {display} + + + + + } + onClick={() => handleFeedback("like")} + /> + + + } + onClick={() => handleFeedback("dislike")} + /> + + {regenerate && ( + + )} + +
    + ) : ( +
    + +
    + {includeMessageSwitcher && ( +
    + { + onMessageSelection( + otherMessagesCanSwitchTo[ + currentMessageInd - 1 + ] + ); + }} + handleNext={() => { + onMessageSelection( + otherMessagesCanSwitchTo[ + currentMessageInd + 1 + ] + ); + }} + /> +
    + )}
    - ); - } - })} + + + + + + } + onClick={() => handleFeedback("like")} + /> + + + + } + onClick={() => handleFeedback("dislike")} + /> + + {regenerate && ( + + )} +
    +
    + ))}
    - )} -
    - {handleFeedback && ( -
    - - handleFeedback("like")} - /> - handleFeedback("dislike")} - />
    - )} +
    @@ -414,9 +655,11 @@ function MessageSwitcher({ icon={FiChevronLeft} onClick={currentPage === 1 ? undefined : handlePrevious} /> - + + {currentPage} / {totalPages} + null, }: { + shared?: boolean; content: string; files?: FileDescriptor[]; messageId?: number | null; otherMessagesCanSwitchTo?: number[]; onEdit?: (editedContent: string) => void; onMessageSelection?: (messageId: number) => void; + stopGenerating?: () => void; }) => { const textareaRef = useRef(null); @@ -475,29 +722,21 @@ export const HumanMessage = ({ return (
    setIsHovered(true)} onMouseLeave={() => setIsHovered(false)} > -
    -
    -
    -
    -
    - -
    -
    - -
    You
    -
    -
    -
    - - - {isEditing ? ( -
    -
    +
    +
    + +
    +
    + {isEditing ? ( +
    +
    -