From 8467c2166da4d1fc456fc3f3c10f9fcb9bfe1494 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Mon, 28 Oct 2024 16:13:20 -0700 Subject: [PATCH 1/4] added old error handling to comment fetching --- .../connectors/confluence/connector.py | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index fe52862982d..ec578c288c6 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -50,6 +50,10 @@ "restrictions.read.restrictions.group", ] +_NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = ( + "No parent or not permitted to view content with id" +) + class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector): def __init__( @@ -118,15 +122,21 @@ def _get_comment_string_for_page_id(self, page_id: str) -> str: comment_cql += self.cql_label_filter expand = ",".join(_COMMENT_EXPANSION_FIELDS) - for comments in self.confluence_client.paginated_cql_page_retrieval( - cql=comment_cql, - expand=expand, - ): - for comment in comments: - comment_string += "\nComment:\n" - comment_string += extract_text_from_confluence_html( - confluence_client=self.confluence_client, confluence_object=comment - ) + try: + for comments in self.confluence_client.paginated_cql_page_retrieval( + cql=comment_cql, + expand=expand, + ): + for comment in comments: + comment_string += "\nComment:\n" + comment_string += extract_text_from_confluence_html( + confluence_client=self.confluence_client, + confluence_object=comment, + ) + except Exception as e: + logger.exception("error fetching comments: \n") + if _NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR in str(e): + raise return comment_string From 52a2a8f33a3c6c700368bddd234e5afb522fd4f2 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Tue, 29 Oct 2024 06:34:08 -0700 Subject: [PATCH 2/4] Not --- backend/danswer/connectors/confluence/connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index ec578c288c6..997ad77a4b7 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -135,7 +135,7 @@ def _get_comment_string_for_page_id(self, page_id: str) -> str: ) except Exception as e: logger.exception("error fetching comments: \n") - if _NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR in str(e): + if _NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR not in str(e): raise return comment_string From ee47b9509a9b3a4588592ab336958f21bd034f5e Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Tue, 29 Oct 2024 07:28:01 -0700 Subject: [PATCH 3/4] properly escaped cql labels --- backend/danswer/connectors/confluence/connector.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 997ad77a4b7..663cec96ff0 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -95,12 +95,13 @@ def __init__( cql_page_query += f" and id='{page_id}'" self.cql_page_query = cql_page_query - self.cql_label_filter = "" self.cql_time_filter = "" + + self.cql_label_filter = "" if labels_to_skip: labels_to_skip = list(set(labels_to_skip)) - comma_separated_labels = ",".join(labels_to_skip) - self.cql_label_filter = f"&label not in ({comma_separated_labels})" + comma_separated_labels = ",".join(f"'{label}'" for label in labels_to_skip) + self.cql_label_filter = f" and label not in ({comma_separated_labels})" def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: # see https://github.com/atlassian-api/atlassian-python-api/blob/master/atlassian/rest_client.py From 6ee602c11ce5b9e5d463c96ee6ba0e9938af91d1 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Tue, 29 Oct 2024 08:33:09 -0700 Subject: [PATCH 4/4] reverted changes --- .../connectors/confluence/connector.py | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 663cec96ff0..f0945547e57 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -50,10 +50,6 @@ "restrictions.read.restrictions.group", ] -_NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = ( - "No parent or not permitted to view content with id" -) - class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector): def __init__( @@ -123,21 +119,16 @@ def _get_comment_string_for_page_id(self, page_id: str) -> str: comment_cql += self.cql_label_filter expand = ",".join(_COMMENT_EXPANSION_FIELDS) - try: - for comments in self.confluence_client.paginated_cql_page_retrieval( - cql=comment_cql, - expand=expand, - ): - for comment in comments: - comment_string += "\nComment:\n" - comment_string += extract_text_from_confluence_html( - confluence_client=self.confluence_client, - confluence_object=comment, - ) - except Exception as e: - logger.exception("error fetching comments: \n") - if _NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR not in str(e): - raise + for comments in self.confluence_client.paginated_cql_page_retrieval( + cql=comment_cql, + expand=expand, + ): + for comment in comments: + comment_string += "\nComment:\n" + comment_string += extract_text_from_confluence_html( + confluence_client=self.confluence_client, + confluence_object=comment, + ) return comment_string