diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 5ff2ccc7fd1..3e22e29df39 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -66,6 +66,7 @@ class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type + AIRTABLE = "airtable" INGESTION_API = "ingestion_api" SLACK = "slack" WEB = "web" diff --git a/backend/danswer/connectors/airtable/__init__.py b/backend/danswer/connectors/airtable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/connectors/airtable/connector.py b/backend/danswer/connectors/airtable/connector.py new file mode 100644 index 00000000000..2cae2ca3a18 --- /dev/null +++ b/backend/danswer/connectors/airtable/connector.py @@ -0,0 +1,70 @@ +import json +from typing import Any + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from pyairtable import Api as AirtableApi + + +class AirtableClientNotSetUpError(PermissionError): + def __init__(self) -> None: + super().__init__("Airtable Client is not set up, was load_credentials called?") + + +class AirtableConnector(LoadConnector, PollConnector): + def __init__( + self, + base_id: str, + table_name_or_id: str, + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.base_id = base_id + self.table_name_or_id = table_name_or_id + self.batch_size = batch_size + self.airtable_client: AirtableApi | None = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self.airtable_client = AirtableApi(credentials["airtable_access_token"]) + + return None + + def poll_source( + self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + if not self.airtable_client: + raise AirtableClientNotSetUpError() + + table = self.airtable_client.table(self.base_id, self.table_name_or_id) + all_records = table.all() + + record_documents = [] + for record in all_records: + record_document = Document( + id=str(record.get("id")), + sections=[ + Section( + link=f"https://airtable.com/{self.base_id}/{self.table_name_or_id}/", + text=json.dumps(record.get("fields")), + ) + ], + source=DocumentSource.AIRTABLE, + semantic_identifier=f"Airtable Base ID: {self.base_id}. Table Name or ID: {self.table_name_or_id}", + metadata={ + "type": "airtable", + "created_time": record.get("createdTime"), + }, + ) + record_documents.append(record_document) + + yield record_documents + + def load_from_state(self) -> GenerateDocumentsOutput: + if not self.airtable_client: + raise AirtableClientNotSetUpError() + return self.poll_source(None, None) diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 1a3d605d3a5..8edfbc6a5d4 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -1,9 +1,8 @@ from typing import Any from typing import Type -from sqlalchemy.orm import Session - from danswer.configs.constants import DocumentSource +from danswer.connectors.airtable.connector import AirtableConnector from danswer.connectors.axero.connector import AxeroConnector from danswer.connectors.blob.connector import BlobStorageConnector from danswer.connectors.bookstack.connector import BookstackConnector @@ -45,6 +44,7 @@ from danswer.connectors.zulip.connector import ZulipConnector from danswer.db.credentials import backend_update_credential_json from danswer.db.models import Credential +from sqlalchemy.orm import Session class ConnectorMissingException(Exception): @@ -58,6 +58,7 @@ def identify_connector_class( connector_map = { DocumentSource.WEB: WebConnector, DocumentSource.FILE: LocalFileConnector, + DocumentSource.AIRTABLE: AirtableConnector, DocumentSource.SLACK: { InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 5b9d57b9d35..fdc9f4768d4 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -74,3 +74,4 @@ zenpy==2.0.41 dropbox==11.36.2 boto3-stubs[s3]==1.34.133 ultimate_sitemap_parser==0.5 +pyairtable==3.0.0a3 \ No newline at end of file diff --git a/web/public/Airtable.png b/web/public/Airtable.png new file mode 100644 index 00000000000..7a25f56f9fc Binary files /dev/null and b/web/public/Airtable.png differ diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index b5e735b0e65..53be424eec2 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -88,6 +88,7 @@ import voyageIcon from "../../../public/Voyage.png"; import googleIcon from "../../../public/Google.webp"; import { FaRobot } from "react-icons/fa"; +import airtableIcon from "../../../public/Airtable.png"; export interface IconProps { size?: number; @@ -999,6 +1000,20 @@ export const LightSettingsIcon = ({ ); }; +export const AirtableIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ( +
+ Logo +
+ ); +}; + // // COMPANY LOGOS // @@ -2112,7 +2127,7 @@ export const CpuIcon = ({ diff --git a/web/src/lib/connectors/connectors.ts b/web/src/lib/connectors/connectors.ts index 7e56a498dcd..832c37d6c94 100644 --- a/web/src/lib/connectors/connectors.ts +++ b/web/src/lib/connectors/connectors.ts @@ -809,6 +809,28 @@ For example, specifying .*-support.* as a "channel" will cause the connector to }, ], }, + airtable: { + description: "Configure Airtable connector", + values: [ + { + type: "text", + query: "Enter the Airtable Base ID:", + label: "Base ID", + name: "base_id", + optional: false, + description: "The ID of the Airtable base you want to connect to.", + }, + { + type: "text", + query: "Enter the Airtable Table Name or ID:", + label: "Table Name or ID", + name: "table_name_or_id", + optional: false, + description: + "The name or ID of the specific table within the Airtable base.", + }, + ], + }, }; export function createConnectorInitialValues( connector: ConfigurableSources @@ -819,21 +841,18 @@ export function createConnectorInitialValues( name: "", groups: [], is_public: true, - ...configuration.values.reduce( - (acc, field) => { - if (field.type === "select") { - acc[field.name] = null; - } else if (field.type === "list") { - acc[field.name] = field.default || []; - } else if (field.type === "checkbox") { - acc[field.name] = field.default || false; - } else if (field.default !== undefined) { - acc[field.name] = field.default; - } - return acc; - }, - {} as { [record: string]: any } - ), + ...configuration.values.reduce((acc, field) => { + if (field.type === "select") { + acc[field.name] = null; + } else if (field.type === "list") { + acc[field.name] = field.default || []; + } else if (field.type === "checkbox") { + acc[field.name] = field.default || false; + } else if (field.default !== undefined) { + acc[field.name] = field.default; + } + return acc; + }, {} as { [record: string]: any }), }; } @@ -844,28 +863,25 @@ export function createConnectorValidationSchema( return Yup.object().shape({ name: Yup.string().required("Connector Name is required"), - ...configuration.values.reduce( - (acc, field) => { - let schema: any = - field.type === "select" - ? Yup.string() - : field.type === "list" - ? Yup.array().of(Yup.string()) - : field.type === "checkbox" - ? Yup.boolean() - : field.type === "file" - ? Yup.mixed() - : Yup.string(); + ...configuration.values.reduce((acc, field) => { + let schema: any = + field.type === "select" + ? Yup.string() + : field.type === "list" + ? Yup.array().of(Yup.string()) + : field.type === "checkbox" + ? Yup.boolean() + : field.type === "file" + ? Yup.mixed() + : Yup.string(); - if (!field.optional) { - schema = schema.required(`${field.label} is required`); - } + if (!field.optional) { + schema = schema.required(`${field.label} is required`); + } - acc[field.name] = schema; - return acc; - }, - {} as Record - ), + acc[field.name] = schema; + return acc; + }, {} as Record), // These are advanced settings indexingStart: Yup.string().nullable(), pruneFreq: Yup.number().min(0, "Prune frequency must be non-negative"), @@ -1060,3 +1076,8 @@ export interface MediaWikiConfig extends MediaWikiBaseConfig { } export interface WikipediaConfig extends MediaWikiBaseConfig {} + +export interface AirtableConfig { + base_id: string; + table_name_or_id: string; +} diff --git a/web/src/lib/connectors/credentials.ts b/web/src/lib/connectors/credentials.ts index 424a07c82fe..27d73a7b038 100644 --- a/web/src/lib/connectors/credentials.ts +++ b/web/src/lib/connectors/credentials.ts @@ -182,6 +182,10 @@ export interface AxeroCredentialJson { axero_api_token: string; } +export interface AirtableCredentialJson { + airtable_access_token: string; +} + export interface MediaWikiCredentialJson {} export interface WikipediaCredentialJson extends MediaWikiCredentialJson {} @@ -282,6 +286,7 @@ export const credentialTemplates: Record = { access_key_id: "", secret_access_key: "", } as OCICredentialJson, + airtable: { airtable_access_token: "" } as AirtableCredentialJson, google_sites: null, file: null, wikipedia: null, @@ -424,6 +429,9 @@ export const credentialDisplayNames: Record = { // Axero base_url: "Axero Base URL", axero_api_token: "Axero API Token", + + // Airtable + airtable_access_token: "Airtable Access Token", }; export function getDisplayNameForCredentialKey(key: string): string { return credentialDisplayNames[key] || key; diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index bbc63847adb..64ac2976163 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -36,6 +36,7 @@ import { OCIStorageIcon, GoogleStorageIcon, ColorSlackIcon, + AirtableIcon, } from "@/components/icons/icons"; import { ValidSources } from "./types"; import { @@ -277,6 +278,12 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Ingestion", category: SourceCategory.Other, }, + airtable: { + icon: AirtableIcon, + displayName: "Airtable", + category: SourceCategory.Wiki, + docs: "https://docs.danswer.dev/connectors/airtable", + }, // currently used for the Internet Search tool docs, which is why // a globe is used not_applicable: { diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 936e4e6c84c..312864f7320 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -212,6 +212,7 @@ export interface UserGroup { } const validSources = [ + "airtable", "web", "github", "gitlab",