From 46d9697166af5524a4fdd7abb255a017e3c5039b Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Thu, 31 Oct 2024 10:16:07 -0700
Subject: [PATCH 1/5] k

---
 .../tests/pruning/website/css/flexslider.css  | 226 ------------------
 1 file changed, 226 deletions(-)
 delete mode 100644 backend/tests/integration/tests/pruning/website/css/flexslider.css

diff --git a/backend/tests/integration/tests/pruning/website/css/flexslider.css b/backend/tests/integration/tests/pruning/website/css/flexslider.css
deleted file mode 100644
index 6088235631c..00000000000
--- a/backend/tests/integration/tests/pruning/website/css/flexslider.css
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * jQuery FlexSlider v2.0
- * http://www.woothemes.com/flexslider/
- *
- * Copyright 2012 WooThemes
- * Free to use under the GPLv2 license.
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * Contributing author: Tyler Smith (@mbmufffin)
- */
-
-/* Browser Resets */
-.flex-container a:active,
-.flexslider a:active,
-.flex-container a:focus,
-.flexslider a:focus {
-  outline: none;
-}
-.slides,
-.flex-control-nav,
-.flex-direction-nav {
-  margin: 0;
-  padding: 0;
-  list-style: none;
-}
-
-/* FlexSlider Necessary Styles
-*********************************/
-.flexslider {
-  margin: 0;
-  padding: 0;
-}
-.flexslider .slides > li {
-  display: none;
-  -webkit-backface-visibility: hidden;
-} /* Hide the slides before the JS is loaded. Avoids image jumping */
-.flexslider .slides img {
-  width: 100%;
-  display: block;
-}
-.flex-pauseplay span {
-  text-transform: capitalize;
-}
-
-/* Clearfix for the .slides element */
-.slides:after {
-  content: ".";
-  display: block;
-  clear: both;
-  visibility: hidden;
-  line-height: 0;
-  height: 0;
-}
-html[xmlns] .slides {
-  display: block;
-}
-* html .slides {
-  height: 1%;
-}
-
-/* No JavaScript Fallback */
-/* If you are not using another script, such as Modernizr, make sure you
- * include js that eliminates this class on page load */
-.no-js .slides > li:first-child {
-  display: block;
-}
-
-/* FlexSlider Default Theme
-*********************************/
-.flexslider {
-  background: none;
-  position: relative;
-  zoom: 1;
-}
-.flex-viewport {
-  max-height: 2000px;
-  -webkit-transition: all 1s ease;
-  -moz-transition: all 1s ease;
-  transition: all 1s ease;
-}
-.loading .flex-viewport {
-  max-height: 300px;
-}
-.flexslider .slides {
-  zoom: 1;
-}
-
-.carousel li {
-  margin-right: 5px;
-}
-
-/* Caption style */
-
-.flex-caption {
-  background: rgba(0, 0, 0, 0.8);
-  margin-left: 5px;
-  bottom: 5px;
-  position: absolute;
-  padding: 20px;
-  z-index: 99;
-}
-.flex-caption p {
-  font-size: 14px !important;
-  line-height: 22px;
-  font-weight: 300;
-  color: #fff;
-}
-.flex-caption h2,
-.flex-caption h4 {
-  color: #fff;
-}
-
-/* Direction Nav */
-.flex-direction-nav {
-  *height: 0;
-}
-.flex-direction-nav a {
-  width: 30px;
-  height: 40px;
-  margin: 0;
-  display: block;
-  background: url(../img/bg_direction_nav.png) no-repeat 0 0;
-  position: absolute;
-  top: 45%;
-  z-index: 10;
-  cursor: pointer;
-  text-indent: -9999px;
-  opacity: 0;
-  -webkit-transition: all 0.3s ease;
-}
-.flex-direction-nav .flex-next {
-  background-position: 100% 0;
-  right: -36px;
-}
-.flex-direction-nav .flex-prev {
-  left: -36px;
-}
-.flexslider:hover .flex-next {
-  opacity: 0.8;
-  right: 5px;
-}
-.flexslider:hover .flex-prev {
-  opacity: 0.8;
-  left: 5px;
-}
-.flexslider:hover .flex-next:hover,
-.flexslider:hover .flex-prev:hover {
-  opacity: 1;
-}
-.flex-direction-nav .flex-disabled {
-  opacity: 0.3 !important;
-  filter: alpha(opacity=30);
-  cursor: default;
-}
-
-/* Control Nav */
-.flex-control-nav {
-  width: 100%;
-  position: absolute;
-  bottom: 0;
-  text-align: center;
-}
-.flex-control-nav li {
-  margin: 0 6px;
-  display: inline-block;
-  zoom: 1;
-  *display: inline;
-}
-.flex-control-paging li a {
-  width: 11px;
-  height: 11px;
-  display: block;
-  background: #666;
-  background: rgba(0, 0, 0, 0.5);
-  cursor: pointer;
-  text-indent: -9999px;
-  -webkit-border-radius: 20px;
-  -moz-border-radius: 20px;
-  -o-border-radius: 20px;
-  border-radius: 20px;
-  box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3);
-}
-.flex-control-paging li a:hover {
-  background: #333;
-  background: rgba(0, 0, 0, 0.7);
-}
-.flex-control-paging li a.flex-active {
-  background: #000;
-  background: rgba(0, 0, 0, 0.9);
-  cursor: default;
-}
-
-.flex-control-thumbs {
-  margin: 5px 0 0;
-  position: static;
-  overflow: hidden;
-}
-.flex-control-thumbs li {
-  width: 25%;
-  float: left;
-  margin: 0;
-}
-.flex-control-thumbs img {
-  width: 100%;
-  display: block;
-  opacity: 0.7;
-  cursor: pointer;
-}
-.flex-control-thumbs img:hover {
-  opacity: 1;
-}
-.flex-control-thumbs .flex-active {
-  opacity: 1;
-  cursor: default;
-}
-
-@media screen and (max-width: 860px) {
-  .flex-direction-nav .flex-prev {
-    opacity: 1;
-    left: 0;
-  }
-  .flex-direction-nav .flex-next {
-    opacity: 1;
-    right: 0;
-  }
-}

From a0736eaf411c6bd66ae540636e0fa584d497a1f0 Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Thu, 31 Oct 2024 16:02:30 -0700
Subject: [PATCH 2/5] k

---
 .../danswer/connectors/zendesk/connector.py   | 419 +++++++++++-------
 backend/requirements/default.txt              |   1 -
 2 files changed, 269 insertions(+), 151 deletions(-)

diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py
index f85f2efff57..7a2491c91e6 100644
--- a/backend/danswer/connectors/zendesk/connector.py
+++ b/backend/danswer/connectors/zendesk/connector.py
@@ -1,10 +1,8 @@
+from collections.abc import Iterator
 from typing import Any
+from typing import Dict
 
 import requests
-from retry import retry
-from zenpy import Zenpy  # type: ignore
-from zenpy.lib.api_objects import Ticket  # type: ignore
-from zenpy.lib.api_objects.help_centre_objects import Article  # type: ignore
 
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
@@ -20,43 +18,244 @@
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 from danswer.file_processing.html_utils import parse_html_page_basic
+from danswer.utils.retry_wrapper import retry_builder
 
 
-def _article_to_document(article: Article, content_tags: dict[str, str]) -> Document:
-    author = BasicExpertInfo(
-        display_name=article.author.name, email=article.author.email
+MAX_PAGE_SIZE = 30
+
+
+class ZendeskCredentialsNotSetUpError(PermissionError):
+    def __init__(self) -> None:
+        super().__init__(
+            "Zendesk Credentials are not set up, was load_credentials called?"
+        )
+
+
+class ZendeskClient:
+    def __init__(self, subdomain: str, email: str, token: str):
+        self.base_url = f"https://{subdomain}.zendesk.com/api/v2"
+        self.auth = (f"{email}/token", token)
+
+    @retry_builder()
+    def make_request(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        response = requests.get(
+            f"{self.base_url}/{endpoint}", auth=self.auth, params=params
+        )
+        response.raise_for_status()
+        return response.json()
+
+
+def _get_content_tag_mapping(client: ZendeskClient) -> dict[str, str]:
+    content_tags: dict[str, str] = {}
+    params = {"page[size]": MAX_PAGE_SIZE}
+
+    try:
+        while True:
+            data = client.make_request("guide/content_tags", params)
+
+            for tag in data.get("records", []):
+                content_tags[tag["id"]] = tag["name"]
+
+            # Check if there are more pages
+            if data.get("meta", {}).get("has_more", False):
+                params["page[after]"] = data["meta"]["after_cursor"]
+            else:
+                break
+
+        return content_tags
+    except Exception as e:
+        raise Exception(f"Error fetching content tags: {str(e)}")
+
+
+def _get_articles(
+    client: ZendeskClient, start_time: int | None = None, page_size: int = MAX_PAGE_SIZE
+) -> Iterator[Dict[str, Any]]:
+    params = (
+        {"start_time": start_time, "page[size]": page_size}
+        if start_time
+        else {"page[size]": page_size}
+    )
+
+    while True:
+        data = client.make_request("help_center/articles", params)
+        for article in data["articles"]:
+            yield article
+
+        if not data.get("meta", {}).get("has_more"):
+            break
+        params["page[after]"] = data["meta"]["after_cursor"]
+
+
+def _get_tickets(
+    client: ZendeskClient, start_time: int | None = None
+) -> Iterator[Dict[str, Any]]:
+    params = {"start_time": start_time} if start_time else {"start_time": 0}
+
+    while True:
+        data = client.make_request("incremental/tickets.json", params)
+        for ticket in data["tickets"]:
+            yield ticket
+
+        if not data.get("end_of_stream", False):
+            params["start_time"] = data["end_time"]
+        else:
+            break
+
+
+def _fetch_author(client: ZendeskClient, author_id: str) -> BasicExpertInfo | None:
+    author_data = client.make_request(f"users/{author_id}", {})
+    user = author_data.get("user")
+    return (
+        BasicExpertInfo(display_name=user.get("name"), email=user.get("email"))
+        if user and user.get("name") and user.get("email")
+        else None
     )
-    update_time = time_str_to_utc(article.updated_at)
 
-    # build metadata
+
+def _article_to_document(
+    article: dict[str, Any],
+    content_tags: dict[str, str],
+    author_map: dict[str, BasicExpertInfo],
+    client: ZendeskClient,
+) -> tuple[dict[str, BasicExpertInfo] | None, Document]:
+    author_id = article.get("author_id")
+    if not author_id:
+        author = None
+    else:
+        author = (
+            author_map.get(author_id)
+            if author_id in author_map
+            else _fetch_author(client, author_id)
+        )
+
+    new_author_mapping = {author_id: author} if author_id and author else None
+
+    updated_at = article.get("updated_at")
+    update_time = time_str_to_utc(updated_at) if updated_at else None
+
+    # Build metadata
     metadata: dict[str, str | list[str]] = {
-        "labels": [str(label) for label in article.label_names if label],
+        "labels": [str(label) for label in article.get("label_names", []) if label],
         "content_tags": [
             content_tags[tag_id]
-            for tag_id in article.content_tag_ids
+            for tag_id in article.get("content_tag_ids", [])
             if tag_id in content_tags
         ],
     }
 
-    # remove empty values
+    # Remove empty values
     metadata = {k: v for k, v in metadata.items() if v}
 
-    return Document(
-        id=f"article:{article.id}",
+    return new_author_mapping, Document(
+        id=f"article:{article['id']}",
         sections=[
-            Section(link=article.html_url, text=parse_html_page_basic(article.body))
+            Section(
+                link=article.get("html_url"),
+                text=parse_html_page_basic(article["body"]),
+            )
         ],
         source=DocumentSource.ZENDESK,
-        semantic_identifier=article.title,
+        semantic_identifier=article["title"],
         doc_updated_at=update_time,
-        primary_owners=[author],
+        primary_owners=[author] if author else None,
         metadata=metadata,
     )
 
 
-class ZendeskClientNotSetUpError(PermissionError):
-    def __init__(self) -> None:
-        super().__init__("Zendesk Client is not set up, was load_credentials called?")
+def _get_comment_text(
+    comment: dict[str, Any],
+    author_map: dict[str, BasicExpertInfo],
+    client: ZendeskClient,
+) -> tuple[dict[str, BasicExpertInfo] | None, str]:
+    author_id = comment.get("author_id")
+    if not author_id:
+        author = None
+    else:
+        author = (
+            author_map.get(author_id)
+            if author_id in author_map
+            else _fetch_author(client, author_id)
+        )
+
+    new_author_mapping = {author_id: author} if author_id and author else None
+
+    comment_text = f"Comment{' by ' + author.display_name if author and author.display_name else ''}"
+    comment_text += f"{' at ' + comment['created_at'] if comment.get('created_at') else ''}:\n{comment['body']}"
+
+    return new_author_mapping, comment_text
+
+
+def _ticket_to_document(
+    ticket: dict[str, Any],
+    author_map: dict[str, BasicExpertInfo],
+    client: ZendeskClient,
+    default_subdomain: str,
+) -> tuple[dict[str, BasicExpertInfo] | None, Document]:
+    submitter_id = ticket.get("submitter")
+    if not submitter_id:
+        submitter = None
+    else:
+        submitter = (
+            author_map.get(submitter_id)
+            if submitter_id in author_map
+            else _fetch_author(client, submitter_id)
+        )
+
+    new_author_mapping = (
+        {submitter_id: submitter} if submitter_id and submitter else None
+    )
+
+    updated_at = ticket.get("updated_at")
+    update_time = time_str_to_utc(updated_at) if updated_at else None
+
+    metadata: dict[str, str | list[str]] = {}
+    if status := ticket.get("status"):
+        metadata["status"] = status
+    if priority := ticket.get("priority"):
+        metadata["priority"] = priority
+    if tags := ticket.get("tags"):
+        metadata["tags"] = tags
+    if ticket_type := ticket.get("type"):
+        metadata["ticket_type"] = ticket_type
+
+    # Fetch comments for the ticket
+    comments_data = client.make_request(f"tickets/{ticket.get('id')}/comments", {})
+    comments = comments_data.get("comments", [])
+
+    comment_texts = []
+    for comment in comments:
+        new_author_mapping, comment_text = _get_comment_text(
+            comment, author_map, client
+        )
+        if new_author_mapping:
+            author_map.update(new_author_mapping)
+        comment_texts.append(comment_text)
+
+    comments_text = "\n\n".join(comment_texts)
+
+    subject = ticket.get("subject")
+    full_text = f"Ticket Subject:\n{subject}\n\nComments:\n{comments_text}"
+
+    ticket_url = ticket.get("url")
+    subdomain = (
+        ticket_url.split("//")[1].split(".zendesk.com")[0]
+        if ticket_url
+        else default_subdomain
+    )
+
+    ticket_display_url = (
+        f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.get('id')}"
+    )
+
+    return new_author_mapping, Document(
+        id=f"zendesk_ticket_{ticket['id']}",
+        sections=[Section(link=ticket_display_url, text=full_text)],
+        source=DocumentSource.ZENDESK,
+        semantic_identifier=f"Ticket #{ticket['id']}: {subject or 'No Subject'}",
+        doc_updated_at=update_time,
+        primary_owners=[submitter] if submitter else None,
+        metadata=metadata,
+    )
 
 
 class ZendeskConnector(LoadConnector, PollConnector):
@@ -66,44 +265,10 @@ def __init__(
         content_type: str = "articles",
     ) -> None:
         self.batch_size = batch_size
-        self.zendesk_client: Zenpy | None = None
-        self.content_tags: dict[str, str] = {}
         self.content_type = content_type
-
-    @retry(tries=3, delay=2, backoff=2)
-    def _set_content_tags(
-        self, subdomain: str, email: str, token: str, page_size: int = 30
-    ) -> None:
-        # Construct the base URL
-        base_url = f"https://{subdomain}.zendesk.com/api/v2/guide/content_tags"
-
-        # Set up authentication
-        auth = (f"{email}/token", token)
-
-        # Set up pagination parameters
-        params = {"page[size]": page_size}
-
-        try:
-            while True:
-                # Make the GET request
-                response = requests.get(base_url, auth=auth, params=params)
-
-                # Check if the request was successful
-                if response.status_code == 200:
-                    data = response.json()
-                    content_tag_list = data.get("records", [])
-                    for tag in content_tag_list:
-                        self.content_tags[tag["id"]] = tag["name"]
-
-                    # Check if there are more pages
-                    if data.get("meta", {}).get("has_more", False):
-                        params["page[after]"] = data["meta"]["after_cursor"]
-                    else:
-                        break
-                else:
-                    raise Exception(f"Error: {response.status_code}\n{response.text}")
-        except Exception as e:
-            raise Exception(f"Error fetching content tags: {str(e)}")
+        self.subdomain = ""
+        # Fetch all tags ahead of time
+        self.content_tags: dict[str, str] = {}
 
     def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
         # Subdomain is actually the whole URL
@@ -112,87 +277,23 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
             .replace("https://", "")
             .split(".zendesk.com")[0]
         )
+        self.subdomain = subdomain
 
-        self.zendesk_client = Zenpy(
-            subdomain=subdomain,
-            email=credentials["zendesk_email"],
-            token=credentials["zendesk_token"],
-        )
-        self._set_content_tags(
-            subdomain,
-            credentials["zendesk_email"],
-            credentials["zendesk_token"],
+        self.client = ZendeskClient(
+            subdomain, credentials["zendesk_email"], credentials["zendesk_token"]
         )
         return None
 
     def load_from_state(self) -> GenerateDocumentsOutput:
         return self.poll_source(None, None)
 
-    def _ticket_to_document(self, ticket: Ticket) -> Document:
-        if self.zendesk_client is None:
-            raise ZendeskClientNotSetUpError()
-
-        owner = None
-        if ticket.requester and ticket.requester.name and ticket.requester.email:
-            owner = [
-                BasicExpertInfo(
-                    display_name=ticket.requester.name, email=ticket.requester.email
-                )
-            ]
-        update_time = time_str_to_utc(ticket.updated_at) if ticket.updated_at else None
-
-        metadata: dict[str, str | list[str]] = {}
-        if ticket.status is not None:
-            metadata["status"] = ticket.status
-        if ticket.priority is not None:
-            metadata["priority"] = ticket.priority
-        if ticket.tags:
-            metadata["tags"] = ticket.tags
-        if ticket.type is not None:
-            metadata["ticket_type"] = ticket.type
-
-        # Fetch comments for the ticket
-        comments = self.zendesk_client.tickets.comments(ticket=ticket)
-
-        # Combine all comments into a single text
-        comments_text = "\n\n".join(
-            [
-                f"Comment{f' by {comment.author.name}' if comment.author and comment.author.name else ''}"
-                f"{f' at {comment.created_at}' if comment.created_at else ''}:\n{comment.body}"
-                for comment in comments
-                if comment.body
-            ]
-        )
-
-        # Combine ticket description and comments
-        description = (
-            ticket.description
-            if hasattr(ticket, "description") and ticket.description
-            else ""
-        )
-        full_text = f"Ticket Description:\n{description}\n\nComments:\n{comments_text}"
-
-        # Extract subdomain from ticket.url
-        subdomain = ticket.url.split("//")[1].split(".zendesk.com")[0]
-
-        # Build the html url for the ticket
-        ticket_url = f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.id}"
-
-        return Document(
-            id=f"zendesk_ticket_{ticket.id}",
-            sections=[Section(link=ticket_url, text=full_text)],
-            source=DocumentSource.ZENDESK,
-            semantic_identifier=f"Ticket #{ticket.id}: {ticket.subject or 'No Subject'}",
-            doc_updated_at=update_time,
-            primary_owners=owner,
-            metadata=metadata,
-        )
-
     def poll_source(
         self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
     ) -> GenerateDocumentsOutput:
-        if self.zendesk_client is None:
-            raise ZendeskClientNotSetUpError()
+        if self.client is None:
+            raise ZendeskCredentialsNotSetUpError()
+
+        self.content_tags = _get_content_tag_mapping(self.client)
 
         if self.content_type == "articles":
             yield from self._poll_articles(start)
@@ -204,26 +305,32 @@ def poll_source(
     def _poll_articles(
         self, start: SecondsSinceUnixEpoch | None
     ) -> GenerateDocumentsOutput:
-        articles = (
-            self.zendesk_client.help_center.articles(cursor_pagination=True)  # type: ignore
-            if start is None
-            else self.zendesk_client.help_center.articles.incremental(  # type: ignore
-                start_time=int(start)
-            )
-        )
+        articles = _get_articles(self.client, start_time=int(start) if start else None)
+        author_map: dict[
+            str, BasicExpertInfo
+        ] = (
+            {}
+        )  # This one is built on the fly as there may be more many more authors than tags
+
         doc_batch = []
         for article in articles:
             if (
-                article.body is None
-                or article.draft
+                article.get("body") is None
+                or article.get("draft")
                 or any(
                     label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
-                    for label in article.label_names
+                    for label in article.get("label_names", [])
                 )
             ):
                 continue
 
-            doc_batch.append(_article_to_document(article, self.content_tags))
+            new_author_map, documents = _article_to_document(
+                article, self.content_tags, author_map, self.client
+            )
+            if new_author_map:
+                author_map.update(new_author_map)
+
+            doc_batch.append(documents)
             if len(doc_batch) >= self.batch_size:
                 yield doc_batch
                 doc_batch.clear()
@@ -234,10 +341,14 @@ def _poll_articles(
     def _poll_tickets(
         self, start: SecondsSinceUnixEpoch | None
     ) -> GenerateDocumentsOutput:
-        if self.zendesk_client is None:
-            raise ZendeskClientNotSetUpError()
+        if self.client is None:
+            raise ZendeskCredentialsNotSetUpError()
 
-        ticket_generator = self.zendesk_client.tickets.incremental(start_time=start)
+        author_map: dict[str, BasicExpertInfo] = {}
+
+        ticket_generator = _get_tickets(
+            self.client, start_time=int(start) if start else None
+        )
 
         while True:
             doc_batch = []
@@ -246,10 +357,20 @@ def _poll_tickets(
                     ticket = next(ticket_generator)
 
                     # Check if the ticket status is deleted and skip it if so
-                    if ticket.status == "deleted":
+                    if ticket.get("status") == "deleted":
                         continue
 
-                    doc_batch.append(self._ticket_to_document(ticket))
+                    new_author_map, documents = _ticket_to_document(
+                        ticket=ticket,
+                        author_map=author_map,
+                        client=self.client,
+                        default_subdomain=self.subdomain,
+                    )
+
+                    if new_author_map:
+                        author_map.update(new_author_map)
+
+                    doc_batch.append(documents)
 
                     if len(doc_batch) >= self.batch_size:
                         yield doc_batch
@@ -266,16 +387,14 @@ def _poll_tickets(
 
 
 if __name__ == "__main__":
-    import os
-
     import time
 
-    connector = ZendeskConnector()
+    connector = ZendeskConnector(content_type="tickets")
     connector.load_credentials(
         {
-            "zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
-            "zendesk_email": os.environ["ZENDESK_EMAIL"],
-            "zendesk_token": os.environ["ZENDESK_TOKEN"],
+            "zendesk_subdomain": "https://dansweraisupport.zendesk.com",
+            "zendesk_email": "yuhong@danswer.ai",
+            "zendesk_token": "LGtvNnAcXRi5DTCt9yo7X6b6w6cg5cgw5A8LgjKl",
         }
     )
 
diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
index fe0cdd01547..c06ab2ca7cf 100644
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -75,7 +75,6 @@ uvicorn==0.21.1
 zulip==0.8.2
 hubspot-api-client==8.1.0
 asana==5.0.8
-zenpy==2.0.41
 dropbox==11.36.2
 boto3-stubs[s3]==1.34.133
 stripe==10.12.0

From 96f91ad634cfa391ae3cea966c19c5c3eb66f729 Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Thu, 31 Oct 2024 16:03:42 -0700
Subject: [PATCH 3/5] k

---
 backend/danswer/connectors/zendesk/connector.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py
index 7a2491c91e6..dbb455830e2 100644
--- a/backend/danswer/connectors/zendesk/connector.py
+++ b/backend/danswer/connectors/zendesk/connector.py
@@ -1,6 +1,5 @@
 from collections.abc import Iterator
 from typing import Any
-from typing import Dict
 
 import requests
 
@@ -37,7 +36,7 @@ def __init__(self, subdomain: str, email: str, token: str):
         self.auth = (f"{email}/token", token)
 
     @retry_builder()
-    def make_request(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
+    def make_request(self, endpoint: str, params: dict[str, Any]) -> dict[str, Any]:
         response = requests.get(
             f"{self.base_url}/{endpoint}", auth=self.auth, params=params
         )
@@ -69,7 +68,7 @@ def _get_content_tag_mapping(client: ZendeskClient) -> dict[str, str]:
 
 def _get_articles(
     client: ZendeskClient, start_time: int | None = None, page_size: int = MAX_PAGE_SIZE
-) -> Iterator[Dict[str, Any]]:
+) -> Iterator[dict[str, Any]]:
     params = (
         {"start_time": start_time, "page[size]": page_size}
         if start_time
@@ -88,7 +87,7 @@ def _get_articles(
 
 def _get_tickets(
     client: ZendeskClient, start_time: int | None = None
-) -> Iterator[Dict[str, Any]]:
+) -> Iterator[dict[str, Any]]:
     params = {"start_time": start_time} if start_time else {"start_time": 0}
 
     while True:

From 43ecdcac07b8d858f0305359978a3a1dc9ff3ddc Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Thu, 31 Oct 2024 16:04:58 -0700
Subject: [PATCH 4/5] k

---
 backend/danswer/connectors/zendesk/connector.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py
index dbb455830e2..a0cd73ad10c 100644
--- a/backend/danswer/connectors/zendesk/connector.py
+++ b/backend/danswer/connectors/zendesk/connector.py
@@ -20,7 +20,7 @@
 from danswer.utils.retry_wrapper import retry_builder
 
 
-MAX_PAGE_SIZE = 30
+MAX_PAGE_SIZE = 30  # Zendesk API maximum
 
 
 class ZendeskCredentialsNotSetUpError(PermissionError):
@@ -386,14 +386,15 @@ def _poll_tickets(
 
 
 if __name__ == "__main__":
+    import os
     import time
 
-    connector = ZendeskConnector(content_type="tickets")
+    connector = ZendeskConnector()
     connector.load_credentials(
         {
-            "zendesk_subdomain": "https://dansweraisupport.zendesk.com",
-            "zendesk_email": "yuhong@danswer.ai",
-            "zendesk_token": "LGtvNnAcXRi5DTCt9yo7X6b6w6cg5cgw5A8LgjKl",
+            "zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
+            "zendesk_email": os.environ["ZENDESK_EMAIL"],
+            "zendesk_token": os.environ["ZENDESK_TOKEN"],
         }
     )
 

From ff3fc85f0b59e7edc9b4c865fb4631a1c15fe618 Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Thu, 31 Oct 2024 16:08:44 -0700
Subject: [PATCH 5/5] k

---
 backend/danswer/connectors/zendesk/connector.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py
index a0cd73ad10c..195fe45af0f 100644
--- a/backend/danswer/connectors/zendesk/connector.py
+++ b/backend/danswer/connectors/zendesk/connector.py
@@ -305,11 +305,9 @@ def _poll_articles(
         self, start: SecondsSinceUnixEpoch | None
     ) -> GenerateDocumentsOutput:
         articles = _get_articles(self.client, start_time=int(start) if start else None)
-        author_map: dict[
-            str, BasicExpertInfo
-        ] = (
-            {}
-        )  # This one is built on the fly as there may be more many more authors than tags
+
+        # This one is built on the fly as there may be more many more authors than tags
+        author_map: dict[str, BasicExpertInfo] = {}
 
         doc_batch = []
         for article in articles: