Skip to content
This repository has been archived by the owner on Dec 11, 2024. It is now read-only.

Commit

Permalink
matching code with latest upstream
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Co <alex.tuan@mindvalley.com>
  • Loading branch information
onimsha committed Sep 19, 2024
1 parent 55cb0d0 commit 829bdbd
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 97 deletions.
13 changes: 6 additions & 7 deletions backend/Dockerfile.model_server
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \
# Download tokenizers, distilbert for the Danswer model
# Download model weights
# Run Nomic to pull in the custom architecture and have it cached locally
# RUN python -c "from transformers import AutoTokenizer; \
# from huggingface_hub import snapshot_download; \
# snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
# snapshot_download('nomic-ai/nomic-embed-text-v1'); \
# from sentence_transformers import SentenceTransformer; \
# SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1');"

# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
# running Danswer, don't overwrite it with the built in cache folder
# RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface

WORKDIR /app

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Create Date: 2024-04-13 18:07:29.153817
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Create Date: 2024-04-15 01:36:02.952809
"""

import json
from typing import cast
from alembic import op
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Create Date: 2024-04-28 16:59:33.199153
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Create Date: 2024-04-25 17:05:09.695703
"""

from alembic import op

# revision identifiers, used by Alembic.
Expand Down
1 change: 1 addition & 0 deletions backend/danswer/auth/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ async def oauth_callback(
associate_by_email: bool = False,
is_verified_by_default: bool = False,
) -> models.UOAP:
verify_email_in_whitelist(account_email)
verify_email_domain(account_email)

user = await super().oauth_callback( # type: ignore
Expand Down
71 changes: 71 additions & 0 deletions backend/danswer/background/celery/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,77 @@ def on_setup_logging(
task_logger.propagate = False


class CeleryTaskPlainFormatter(PlainFormatter):
def format(self, record: logging.LogRecord) -> str:
task = current_task
if task and task.request:
record.__dict__.update(task_id=task.request.id, task_name=task.name)
record.msg = f"[{task.name}({task.request.id})] {record.msg}"

return super().format(record)


class CeleryTaskColoredFormatter(ColoredFormatter):
def format(self, record: logging.LogRecord) -> str:
task = current_task
if task and task.request:
record.__dict__.update(task_id=task.request.id, task_name=task.name)
record.msg = f"[{task.name}({task.request.id})] {record.msg}"

return super().format(record)


@signals.setup_logging.connect
def on_setup_logging(
loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
) -> None:
# TODO: could unhardcode format and colorize and accept these as options from
# celery's config

# reformats celery's worker logger
root_logger = logging.getLogger()

root_handler = logging.StreamHandler() # Set up a handler for the root logger
root_formatter = ColoredFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
root_handler.setFormatter(root_formatter)
root_logger.addHandler(root_handler) # Apply the handler to the root logger

if logfile:
root_file_handler = logging.FileHandler(logfile)
root_file_formatter = PlainFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
root_file_handler.setFormatter(root_file_formatter)
root_logger.addHandler(root_file_handler)

root_logger.setLevel(loglevel)

# reformats celery's task logger
task_formatter = CeleryTaskColoredFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
task_handler = logging.StreamHandler() # Set up a handler for the task logger
task_handler.setFormatter(task_formatter)
task_logger.addHandler(task_handler) # Apply the handler to the task logger

if logfile:
task_file_handler = logging.FileHandler(logfile)
task_file_formatter = CeleryTaskPlainFormatter(
"%(asctime)s %(filename)30s %(lineno)4s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
task_file_handler.setFormatter(task_file_formatter)
task_logger.addHandler(task_file_handler)

task_logger.setLevel(loglevel)
task_logger.propagate = False


#####
# Celery Beat (Periodic Tasks) Settings
#####
Expand Down
8 changes: 3 additions & 5 deletions backend/danswer/connectors/google_site/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,9 @@ def load_from_state(self) -> GenerateDocumentsOutput:
semantic_identifier=title,
sections=[
Section(
link=(
(self.base_url.rstrip("/") + "/" + path.lstrip("/"))
if path
else ""
),
link=(self.base_url.rstrip("/") + "/" + path.lstrip("/"))
if path
else "",
text=parsed_html.cleaned_text,
)
],
Expand Down
3 changes: 0 additions & 3 deletions backend/danswer/db/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import NullPool

from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
from danswer.configs.app_configs import LOG_POSTGRES_LATENCY
Expand Down Expand Up @@ -131,7 +130,6 @@ def init_sqlalchemy_engine(app_name: str) -> None:


def get_sqlalchemy_engine() -> Engine:
connect_args = {"sslmode": "disable"}
global _SYNC_ENGINE
if _SYNC_ENGINE is None:
connection_string = build_connection_string(
Expand All @@ -148,7 +146,6 @@ def get_sqlalchemy_engine() -> Engine:


def get_sqlalchemy_async_engine() -> AsyncEngine:
connect_args = {"ssl": "disable"}
global _ASYNC_ENGINE
if _ASYNC_ENGINE is None:
# underlying asyncpg cannot accept application_name directly in the connection string
Expand Down
77 changes: 0 additions & 77 deletions backend/danswer/llm/gpt_4_all.py

This file was deleted.

1 change: 1 addition & 0 deletions backend/danswer/tools/built_in_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def auto_add_search_tool_to_personas(db_session: Session) -> None:
db_session.commit()
logger.notice("Completed adding SearchTool to relevant Personas.")


_built_in_tools_cache: dict[int, Type[Tool]] | None = None


Expand Down
1 change: 0 additions & 1 deletion backend/requirements/default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dask==2023.8.1
ddtrace==2.6.5
distributed==2023.8.1
fastapi==0.109.2
fastapi-health==0.4.0
fastapi-users==12.1.3
fastapi-users-db-sqlalchemy==5.0.0
filelock==3.15.4
Expand Down

0 comments on commit 829bdbd

Please sign in to comment.