Skip to content

Commit

Permalink
Merge pull request #3032 from danswer-ai/freshdesk-cleanup
Browse files Browse the repository at this point in the history
Cleaned up connector
  • Loading branch information
hagen-danswer authored Nov 2, 2024
2 parents 2e49027 + 83c299e commit 5d9b836
Showing 1 changed file with 84 additions and 64 deletions.
148 changes: 84 additions & 64 deletions backend/danswer/connectors/freshdesk/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,76 +20,90 @@

logger = setup_logger()

_FRESHDESK_ID_PREFIX = "FRESHDESK_"


_TICKET_FIELDS_TO_INCLUDE = {
"fr_escalated",
"spam",
"priority",
"source",
"status",
"type",
"is_escalated",
"tags",
"nr_due_by",
"nr_escalated",
"cc_emails",
"fwd_emails",
"reply_cc_emails",
"ticket_cc_emails",
"support_email",
"to_emails",
}

_SOURCE_NUMBER_TYPE_MAP = {
"1": "Email",
"2": "Portal",
"3": "Phone",
"7": "Chat",
"9": "Feedback Widget",
"10": "Outbound Email",
}

_PRIORITY_NUMBER_TYPE_MAP = {"1": "low", "2": "medium", "3": "high", "4": "urgent"}

_STATUS_NUMBER_TYPE_MAP = {"2": "open", "3": "pending", "4": "resolved", "5": "closed"}


def _create_metadata_from_ticket(ticket: dict) -> dict:
included_fields = {
"fr_escalated",
"spam",
"priority",
"source",
"status",
"type",
"is_escalated",
"tags",
"nr_due_by",
"nr_escalated",
"cc_emails",
"fwd_emails",
"reply_cc_emails",
"ticket_cc_emails",
"support_email",
"to_emails",
}

metadata = {}
email_data = {}
metadata: dict[str, str | list[str]] = {}
# Combine all emails into a list so there are no repeated emails
email_data: set[str] = set()

for key, value in ticket.items():
if (
key in included_fields
and value is not None
and value != []
and value != {}
and value != "[]"
and value != ""
):
value_to_str = (
[str(item) for item in value] if isinstance(value, List) else str(value)
)
if "email" in key:
email_data[key] = value_to_str
# Skip fields that aren't useful for embedding
if key not in _TICKET_FIELDS_TO_INCLUDE:
continue

# Skip empty fields
if not value or value == "[]":
continue

# Convert strings or lists to strings
stringified_value: str | list[str]
if isinstance(value, list):
stringified_value = [str(item) for item in value]
else:
stringified_value = str(value)

if "email" in key:
if isinstance(stringified_value, list):
email_data.update(stringified_value)
else:
metadata[key] = value_to_str
email_data.add(stringified_value)
else:
metadata[key] = stringified_value

if email_data:
metadata["email_data"] = str(email_data)

# Convert source to human-parsable string
source_types = {
"1": "Email",
"2": "Portal",
"3": "Phone",
"7": "Chat",
"9": "Feedback Widget",
"10": "Outbound Email",
}
if ticket.get("source"):
metadata["source"] = source_types.get(
str(ticket.get("source")), "Unknown Source Type"
metadata["emails"] = list(email_data)

# Convert source numbers to human-parsable string
if source_number := ticket.get("source"):
metadata["source"] = _SOURCE_NUMBER_TYPE_MAP.get(
str(source_number), "Unknown Source Type"
)

# Convert priority to human-parsable string
priority_types = {"1": "low", "2": "medium", "3": "high", "4": "urgent"}
if ticket.get("priority"):
metadata["priority"] = priority_types.get(
str(ticket.get("priority")), "Unknown Priority"
# Convert priority numbers to human-parsable string
if priority_number := ticket.get("priority"):
metadata["priority"] = _PRIORITY_NUMBER_TYPE_MAP.get(
priority_number, "Unknown Priority"
)

# Convert status to human-parsable string
status_types = {"2": "open", "3": "pending", "4": "resolved", "5": "closed"}
if ticket.get("status"):
metadata["status"] = status_types.get(
str(ticket.get("status")), "Unknown Status"
if status_number := ticket.get("status"):
metadata["status"] = _STATUS_NUMBER_TYPE_MAP.get(
str(status_number), "Unknown Status"
)

due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00"))
Expand All @@ -99,17 +113,24 @@ def _create_metadata_from_ticket(ticket: dict) -> dict:


def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
# Use the ticket description as the text
text = f"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}"
metadata = _create_metadata_from_ticket(ticket)

# This is also used in the ID because it is more unique than the just the ticket ID
link = f"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}"

return Document(
id=str(ticket["id"]),
id=_FRESHDESK_ID_PREFIX + link,
sections=[
Section(
link=f"https://{domain}.freshdesk.com/helpdesk/tickets/{int(ticket['id'])}",
text=f"description: {parse_html_page_basic(ticket.get('description_text', ''))}",
link=link,
text=text,
)
],
source=DocumentSource.FRESHDESK,
semantic_identifier=ticket["subject"],
metadata=_create_metadata_from_ticket(ticket),
metadata=metadata,
doc_updated_at=datetime.fromisoformat(
ticket["updated_at"].replace("Z", "+00:00")
),
Expand Down Expand Up @@ -146,7 +167,7 @@ def _fetch_tickets(
'include' field available for this endpoint:
https://developers.freshdesk.com/api/#filter_tickets
"""
if any(attr is None for attr in [self.api_key, self.domain, self.password]):
if self.api_key is None or self.domain is None or self.password is None:
raise ConnectorMissingCredentialError("freshdesk")

base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets"
Expand Down Expand Up @@ -187,7 +208,6 @@ def _process_tickets(

for ticket_batch in self._fetch_tickets(start, end):
for ticket in ticket_batch:
logger.info(_create_doc_from_ticket(ticket, self.domain))
doc_batch.append(_create_doc_from_ticket(ticket, self.domain))

if len(doc_batch) >= self.batch_size:
Expand Down

0 comments on commit 5d9b836

Please sign in to comment.