diff --git a/plugins/glean-backend/README.md b/plugins/glean-backend/README.md index 216b0b1..8899c03 100644 --- a/plugins/glean-backend/README.md +++ b/plugins/glean-backend/README.md @@ -1,8 +1,5 @@ # Glean Backend -> NOTE: This is currently not in use anymore. However, it could be re-purposed -> to index other parts of Backstage (e.g. catalog entities, tools, etc.) - Welcome to the Glean backend plugin! This backend plugin is used to make our Backstage content available in diff --git a/plugins/glean-backend/config.d.ts b/plugins/glean-backend/config.d.ts index 6471e19..c00ee2b 100644 --- a/plugins/glean-backend/config.d.ts +++ b/plugins/glean-backend/config.d.ts @@ -1,17 +1,30 @@ +import { SchedulerServiceTaskScheduleDefinitionConfig } from '@backstage/backend-plugin-api'; + export interface Config { /** * Glean plugin configuration. */ glean?: { /** - * The base url of the Glean API + * The index url of the Glean API + */ + apiIndexUrl: string; + + /** + * The data source of the Glean API to use + * See: https://support.glean.com/hc/en-us/articles/30038992119451-Data-Sources */ - apiBaseUrl: string; + datasource: string; /** * The api token * @visibility secret */ token: string; + + /** + * The Scheduler for how often to run Glean indexing + */ + schedule?: SchedulerServiceTaskScheduleDefinitionConfig; }; } diff --git a/plugins/glean-backend/dev/index.js b/plugins/glean-backend/dev/index.js new file mode 100644 index 0000000..fcb1af1 --- /dev/null +++ b/plugins/glean-backend/dev/index.js @@ -0,0 +1,7 @@ +// This package should be installed as a `dev` dependency +import { createBackend } from '@backstage/backend-defaults'; + +const backend = createBackend(); +// Path to the file where the plugin is export as default +backend.add(import('../src')); +backend.start(); diff --git a/plugins/glean-backend/package.json b/plugins/glean-backend/package.json index c39bbcd..5b2105e 100644 --- a/plugins/glean-backend/package.json +++ b/plugins/glean-backend/package.json @@ -23,11 +23,16 @@ "postpack": "backstage-cli package postpack" }, "dependencies": { - "@backstage/backend-common": "^0.21.7", - "@backstage/catalog-model": "^1.4.5", - "@backstage/config": "^1.2.0", - "@backstage/core-plugin-api": "^1.9.2", - "@backstage/plugin-techdocs-backend": "^1.10.13", + "@backstage/backend-common": "^0.25.0", + "@backstage/backend-plugin-api": "^1.0.2", + "@backstage/catalog-client": "^1.8.0", + "@backstage/catalog-model": "^1.7.1", + "@backstage/config": "^1.3.0", + "@backstage/core-plugin-api": "^1.10.1", + "@backstage/errors": "^1.2.5", + "@backstage/plugin-catalog-node": "^1.14.0", + "@backstage/plugin-techdocs": "^1.11.2", + "@backstage/plugin-techdocs-backend": "^1.11.3", "@types/express": "*", "@types/supertest": "^6.0.2", "@types/uuid": "^9.0.8", @@ -36,12 +41,22 @@ "lodash": "^4.17.21", "node-fetch": "^2.7.0", "node-html-parser": "^6.1.13", + "react": "^19.0.0", + "react-dom": "^19.0.0", + "react-router-dom": "^7.0.2", "uuid": "^9.0.1", - "winston": "^3.13.0", "yn": "^5.0.0" }, "devDependencies": { - "@backstage/cli": "^0.26.4", + "@backstage/backend-defaults": "^0.5.3", + "@backstage/backend-test-utils": "^1.1.0", + "@backstage/cli": "^0.29.2", + "@backstage/test-utils": "^1.7.2", + "@testing-library/dom": "^10.4.0", + "@testing-library/react": "^16.1.0", + "@types/jest": "^29.5.14", + "@types/react": "^19", + "@types/react-dom": "^19", "msw": "^1.3.3", "supertest": "^6.3.4" }, diff --git a/plugins/glean-backend/src/client/GleanIndexClient.test.ts b/plugins/glean-backend/src/client/GleanIndexClient.test.ts new file mode 100644 index 0000000..6798ba0 --- /dev/null +++ b/plugins/glean-backend/src/client/GleanIndexClient.test.ts @@ -0,0 +1,232 @@ +import { getVoidLogger } from '@backstage/backend-common'; +import { mockServices } from '@backstage/backend-test-utils'; +import { catalogServiceMock } from '@backstage/plugin-catalog-node/testUtils'; +import { Entity } from '@backstage/catalog-model'; +import { ConfigReader } from '@backstage/config'; +import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend'; +import { rest } from 'msw'; +import { setupServer } from 'msw/node'; +import { GleanIndexClient } from './GleanIndexClient'; +import { htmlFixture } from './fixtures/staticTechDocsHtml'; +import { GleanDocument } from './types'; + +describe('GleanIndexClient', () => { + let gleanIndexClient: GleanIndexClient; + const server = setupServer(); + const discoveryApi = { getBaseUrl: jest.fn() }; + const gleanApiIndexUrl = + 'https://customer-be.glean.com/api/index/v1/bulkindexdocuments'; + const auth = mockServices.auth(); + + const config = new ConfigReader({ + backend: { + baseUrl: 'http://localhost', + listen: { port: 7000 }, + }, + app: { + baseUrl: 'http://localhost', + listen: { port: 3000 }, + }, + glean: { + apiIndexUrl: gleanApiIndexUrl, + token: 'I-am-a-token', + datasource: 'I-am-a-datasource', + }, + }); + + const entityWithUrlRef: Entity = { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Component', + metadata: { + name: 'some-handbook-with-url-ref', + namespace: 'default', + annotations: { + 'backstage.io/techdocs-ref': 'url:some_url', + }, + spec: {}, + }, + }; + const entityWithDirRef: Entity = { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Component', + metadata: { + name: 'some-handbook-with-dir-ref', + namespace: 'default', + annotations: { + 'backstage.io/techdocs-ref': 'dir:.', + }, + spec: {}, + }, + }; + const entities = [entityWithUrlRef, entityWithDirRef]; + const catalogApi = catalogServiceMock({ entities }); + + beforeAll(() => server.listen()); + + beforeEach(() => { + gleanIndexClient = GleanIndexClient.create({ + auth, + catalogApi, + config, + discoveryApi, + logger: getVoidLogger(), + }); + }); + + afterEach(() => { + jest.resetAllMocks(); + server.resetHandlers(); + }); + + afterAll(() => server.close()); + + describe('create', () => { + it('returns a new instance of GleanIndexClient', () => { + expect( + GleanIndexClient.create({ + auth, + catalogApi, + config, + discoveryApi, + logger: getVoidLogger(), + }), + ).toBeInstanceOf(GleanIndexClient); + }); + }); + + describe('parseMainContent', () => { + it('removes all nav elements from HTML', () => { + expect(htmlFixture).toEqual(expect.stringContaining(' { + beforeEach(() => { + // eslint-disable-next-line dot-notation + gleanIndexClient['techDocsClient'].getTechDocsStaticFile = jest + .fn() + .mockResolvedValue(htmlFixture); + }); + + it('returns a document object', async () => { + expect( + await gleanIndexClient.buildDocument( + entityWithUrlRef, + 'foo/index.html', + ), + ).toEqual({ + id: 'default/component/some-handbook-with-url-ref/foo/index.html', + title: 'Engineering Handbook', + container: 'some-handbook-with-url-ref', + datasource: 'I-am-a-datasource', + viewURL: + 'http://localhost/docs/default/component/some-handbook-with-url-ref/foo', + body: { + mimeType: 'HTML', + textContent: expect.stringContaining( + "Welcome to Company's Engineering Handbook!", + ), + }, + updatedAt: Math.floor(new Date('April 6, 2022').getTime() / 1000), + permissions: { allowAnonymousAccess: true }, + }); + }); + }); + + describe('batchIndexTechDocs', () => { + const mockDocument: GleanDocument = { + id: 'document-1', + title: 'I am a document', + container: 'some-handbook', + datasource: 'I-am-a-datasource', + viewURL: 'http://backstage.w10e.com', + body: { + mimeType: 'HTML', + textContent: 'I am some text content', + }, + updatedAt: 1652818028, + permissions: { allowAnonymousAccess: true }, + }; + + const mockTechDocsMetadata: TechDocsMetadata = { + site_name: 'some-handbook', + site_description: 'Company',s Engineering Handbook', + etag: '38cf6ed97f8c501426a0e311b76d67c69fc46df3', + build_timestamp: 1652796973948, + files: ['index.html', 'interviewing/index.html', 'onboarding.html'], + }; + + beforeEach(() => { + jest + .spyOn(gleanIndexClient, 'buildDocument') + .mockResolvedValue(mockDocument); + jest + .spyOn(gleanIndexClient, 'indexDocuments') + .mockResolvedValue('response'); + + // eslint-disable-next-line dot-notation + gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest + .fn() + .mockResolvedValue(mockTechDocsMetadata); + + server.use( + rest.post(`${gleanApiIndexUrl}`, (_req, res, ctx) => { + return res(ctx.status(200)); + }), + ); + }); + + it('uploads the Glean documents', async () => { + const indexTechDocs = await gleanIndexClient.batchIndexDocuments( + 'upload-', + [mockDocument], + ); + expect(gleanIndexClient.indexDocuments).toHaveBeenCalledTimes(1); + expect(indexTechDocs).toEqual(1); + }); + + it('builds and uploads the Glean documents for all entities', async () => { + const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs( + entities, + ); + expect(batchIndexTechDocs.uploadId).toContain('upload-'); + expect(batchIndexTechDocs.batchCount).toEqual(1); + }); + + describe('when there are no files to index', () => { + beforeEach(() => { + // eslint-disable-next-line dot-notation + gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest + .fn() + .mockResolvedValue({ ...mockTechDocsMetadata, files: [] }); + }); + + it('does not index tech docs with Glean', async () => { + const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs( + [], + ); + expect(gleanIndexClient.buildDocument).not.toHaveBeenCalled(); + expect(batchIndexTechDocs.uploadId).toContain('upload-'); + expect(batchIndexTechDocs.batchCount).toEqual(0); + }); + }); + }); + + describe('batchIndex', () => { + beforeEach(() => { + jest.spyOn(gleanIndexClient, 'batchIndexTechDocs').mockResolvedValue({ + uploadId: 'upload-7bbf4c41-b73a-4ca2-8245-a23a0c4f37e7', + batchCount: 1, + }); + }); + + it('indexes the TechDocs entities', async () => { + await gleanIndexClient.batchIndex(entities); + expect(gleanIndexClient.batchIndexTechDocs).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/plugins/glean-backend/src/client/GleanIndexClient.ts b/plugins/glean-backend/src/client/GleanIndexClient.ts new file mode 100644 index 0000000..114615b --- /dev/null +++ b/plugins/glean-backend/src/client/GleanIndexClient.ts @@ -0,0 +1,246 @@ +import { Entity } from '@backstage/catalog-model'; +import { Config } from '@backstage/config'; +import { DiscoveryApi } from '@backstage/core-plugin-api'; +import { CatalogApi } from '@backstage/catalog-client'; +import { startCase } from 'lodash'; +import fetch from 'node-fetch'; +import { parse } from 'node-html-parser'; +import { v4 as uuidv4 } from 'uuid'; +import { AuthService, LoggerService } from '@backstage/backend-plugin-api'; +import { TechDocsClient } from './TechDocsClient'; +import { GleanDocument } from './types'; + +export class GleanIndexClient { + private techDocsClient: TechDocsClient; + + static create({ + auth, + config, + discoveryApi, + catalogApi, + logger, + }: { + auth: AuthService; + config: Config; + discoveryApi: DiscoveryApi; + catalogApi: CatalogApi; + logger: LoggerService; + }) { + return new GleanIndexClient(config, logger, discoveryApi, catalogApi, auth); + } + + constructor( + private readonly config: Config, + private logger: LoggerService, + discoveryApi: DiscoveryApi, + catalogApi: CatalogApi, + auth: AuthService, + ) { + this.techDocsClient = TechDocsClient.create({ + auth, + config, + discoveryApi, + catalogApi, + logger, + }); + } + + private generateUploadId(): string { + return `upload-${uuidv4()}`; + } + + private parseMainContent(rawHtml: string): string { + const root = parse(rawHtml); + root.querySelectorAll('nav').forEach(nav => nav.remove()); + return root.toString(); + } + + async buildDocument( + entity: Entity, + filePath: string, + ): Promise { + const rawHtml = await this.techDocsClient + .getTechDocsStaticFile(entity, filePath) + .catch(e => { + this.logger.warn( + `fetching static file ${filePath} for ${entity.metadata.name}: ${e.message}`, + ); + return null; + }); + + const textContent = this.parseMainContent(rawHtml ?? ''); + const title = + this.techDocsClient.parseTitle(rawHtml ?? '') ?? startCase(filePath); + const updatedAtDate = this.techDocsClient.parseUpdatedAt(rawHtml ?? ''); + const updatedAt = Math.floor(updatedAtDate.getTime() / 1000); // in epoch seconds + const datasource = this.config.getString('glean.datasource'); + + const partialDocument = { + container: entity.metadata.name, + datasource: datasource, + id: `${this.techDocsClient.getEntityUri(entity)}/${filePath}`, + // these permissions allow anyone who can sign in to our Glean instance (via Okta SSO) to view the document + permissions: { allowAnonymousAccess: true }, + title, + updatedAt, + viewURL: this.techDocsClient.getViewUrl(entity, filePath), + }; + + this.logger.debug(`Building document: ${JSON.stringify(partialDocument)}`); + + return { + ...partialDocument, + body: { + mimeType: 'HTML', + textContent, + }, + }; + } + + private async buildDocuments(entity: Entity, filesToBuild: Array) { + return Promise.all( + filesToBuild.map((filePath: string) => + this.buildDocument(entity, filePath), + ), + ); + } + + async indexDocuments( + documents: (GleanDocument | null)[], + isFirstPage: boolean, + isLastPage: boolean, + uploadId: string, + ): Promise { + return await new Promise(async (resolve, reject) => { + try { + const apiIndexUrl = this.config.getString('glean.apiIndexUrl'); + const response = await fetch(apiIndexUrl, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.config.getString('glean.token')}`, + }, + body: JSON.stringify({ + uploadId, + isFirstPage, + isLastPage, + forceRestartUpload: isFirstPage, + datasource: this.config.getString('glean.datasource'), + documents, + }), + }); + if (!response.ok) { + reject( + new Error( + `POST ${apiIndexUrl} status: ${ + response.status + }, body: ${await response.text()}`, + ), + ); + } + resolve(response.statusText); + } catch (err) { + if (err instanceof Error) { + this.logger.error(err.message); + reject(new Error(err.message)); + } + } + }); + } + + async batchIndexDocuments( + uploadId: string, + documents: (GleanDocument | null)[], + ) { + let batchCount = 0; + const batchSize = 25; + const errors: Error[] = []; + + for ( + let index = 0; + index < (documents?.length ?? 0); + index = index + batchSize + ) { + const isFirstPage = index < batchSize; + const isLastPage = documents + ? index >= documents.length - batchSize + : false; + const response = await this.indexDocuments( + documents, + isFirstPage, + isLastPage, + uploadId, + ).catch(e => { + errors.push(e); + }); + this.logger.debug( + `${uploadId} indexing batch ${batchCount}: ${response}`, + ); + batchCount++; + } + if (errors.length > 0) { + this.logger.error(`${uploadId} indexing batch ${batchCount}: ${errors}`); + return null; + } + this.logger.info( + `${uploadId} successfully batch indexed ${documents.length} documents in ${batchCount} batches`, + ); + return batchCount; + } + + async batchIndexTechDocs(entities: Entity[]) { + const uploadId = this.generateUploadId(); + this.logger.info( + `${uploadId} batch indexing entities: ${entities + .map(e => this.techDocsClient.getEntityUri(e)) + .join(', ')}`, + ); + + const documents = await Promise.all( + entities.map(async entity => { + const metadata = await this.techDocsClient + .getTechDocsMetadata(entity) + .catch(e => { + this.logger.warn( + `fetching metadata for ${entity.metadata.name}: ${e.message}`, + ); + return null; + }); + const filesToIndex = metadata?.files?.filter( + (filePath: string) => + filePath.endsWith('.html') && filePath !== '404.html', + ); + // the entity has the techdocs-ref annotation but no files to index + if (!filesToIndex || filesToIndex.length === 0) { + this.logger.warn(`No files to index for ${entity.metadata.name}`); + return null; + } + return this.buildDocuments(entity, filesToIndex); + }), + ); + if (!documents || documents.length === 0) { + this.logger.error( + `${uploadId}: no documents to index for ${entities}, skipping bulk indexing`, + ); + return { uploadId, batchCount: 0 }; + } + + const documentsToIndex = documents + .flat() + .filter(document => document !== null && document !== undefined); + const batchCount = await this.batchIndexDocuments( + uploadId, + documentsToIndex, + ).catch(e => { + this.logger.error(`batch indexing ${uploadId}: ${e.message}`); + return { uploadId, batchCount: 0 }; + }); + return { uploadId, batchCount }; + } + + async batchIndex(entities: Entity[]) { + // extend this method as there are more types of entities to index + return await this.batchIndexTechDocs(entities); + } +} diff --git a/plugins/glean-backend/src/service/TechDocsClient.test.ts b/plugins/glean-backend/src/client/TechDocsClient.test.ts similarity index 62% rename from plugins/glean-backend/src/service/TechDocsClient.test.ts rename to plugins/glean-backend/src/client/TechDocsClient.test.ts index 70e3198..523f7ee 100644 --- a/plugins/glean-backend/src/service/TechDocsClient.test.ts +++ b/plugins/glean-backend/src/client/TechDocsClient.test.ts @@ -1,16 +1,19 @@ import { getVoidLogger } from '@backstage/backend-common'; -import { CompoundEntityRef } from '@backstage/catalog-model'; +import { mockServices } from '@backstage/backend-test-utils'; +import { Entity } from '@backstage/catalog-model'; import { ConfigReader } from '@backstage/config'; import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend'; import { rest } from 'msw'; import { setupServer } from 'msw/node'; import { TechDocsClient } from './TechDocsClient'; +import { catalogServiceMock } from '@backstage/plugin-catalog-node/testUtils'; describe('TechDocsClient', () => { let techDocsClient: TechDocsClient; const server = setupServer(); const discoveryApi = { getBaseUrl: jest.fn() }; const baseUrl = 'http://localhost/api'; + const auth = mockServices.auth(); const config = new ConfigReader({ backend: { @@ -23,17 +26,37 @@ describe('TechDocsClient', () => { }, }); - const entity: CompoundEntityRef = { - kind: 'component', - namespace: 'default', - name: 'some-handbook', + const entityWithTechdocsRef: Entity = { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Component', + metadata: { + name: 'some-handbook', + namespace: 'default', + annotations: { + 'backstage.io/techdocs-ref': 'url:some_url', + }, + spec: {}, + }, + }; + const entityWithoutTechdocsRef: Entity = { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Component', + metadata: { + name: 'some-handbook-no-techdocs-ref', + namespace: 'default', + spec: {}, + }, }; + const entities = [entityWithTechdocsRef, entityWithoutTechdocsRef]; + const catalogApi = catalogServiceMock({ entities }); beforeAll(() => server.listen()); beforeEach(() => { discoveryApi.getBaseUrl.mockResolvedValue(baseUrl); techDocsClient = TechDocsClient.create({ + auth, + catalogApi, config, discoveryApi, logger: getVoidLogger(), @@ -51,6 +74,8 @@ describe('TechDocsClient', () => { it('returns a new instance of techDocsClient', () => { expect( TechDocsClient.create({ + auth, + catalogApi, config, logger: getVoidLogger(), discoveryApi, @@ -61,7 +86,7 @@ describe('TechDocsClient', () => { describe('getEntityUri', () => { it('returns the entity URI', () => { - expect(techDocsClient.getEntityUri(entity)).toEqual( + expect(techDocsClient.getEntityUri(entityWithTechdocsRef)).toEqual( 'default/component/some-handbook', ); }); @@ -69,18 +94,46 @@ describe('TechDocsClient', () => { describe('getViewUrl', () => { it('returns docs view URL', () => { - expect(techDocsClient.getViewUrl(entity, 'foo/index.html')).toEqual( - 'http://localhost/docs/default/component/some-handbook/foo', - ); - expect(techDocsClient.getViewUrl(entity, 'foo/bar/index.html')).toEqual( + expect( + techDocsClient.getViewUrl(entityWithTechdocsRef, '/index.html'), + ).toEqual('http://localhost/docs/default/component/some-handbook/'); + expect( + techDocsClient.getViewUrl(entityWithTechdocsRef, 'foo/index.html'), + ).toEqual('http://localhost/docs/default/component/some-handbook/foo'); + expect( + techDocsClient.getViewUrl(entityWithTechdocsRef, 'foo/bar/index.html'), + ).toEqual( 'http://localhost/docs/default/component/some-handbook/foo/bar', ); - expect(techDocsClient.getViewUrl(entity, 'foo/baz.html')).toEqual( + expect( + techDocsClient.getViewUrl(entityWithTechdocsRef, 'foo/baz.html'), + ).toEqual( 'http://localhost/docs/default/component/some-handbook/foo/baz', ); }); }); + describe('getTechDocsEntitiesResponse', () => { + describe('success', () => { + it('returns all techdocs entities response', async () => { + await expect( + techDocsClient.getTechDocsEntitiesResponse(), + ).resolves.toEqual({ + items: [entityWithTechdocsRef], + }); + }); + }); + }); + describe('getTechDocsEntities', () => { + describe('success', () => { + it('returns all techdocs entities', async () => { + await expect(techDocsClient.getTechDocsEntities()).resolves.toEqual([ + entityWithTechdocsRef, + ]); + }); + }); + }); + describe('getTechDocsMetadata', () => { describe('success', () => { const mockTechDocsMetadata: TechDocsMetadata = { @@ -94,7 +147,7 @@ describe('TechDocsClient', () => { beforeEach(() => { server.use( rest.get( - `${baseUrl}/default/component/some-handbook`, + `${baseUrl}/metadata/techdocs/default/component/some-handbook`, (_req, res, ctx) => { return res(ctx.status(200), ctx.json(mockTechDocsMetadata)); }, @@ -104,7 +157,7 @@ describe('TechDocsClient', () => { it('returns expected techdocs metadata', async () => { await expect( - techDocsClient.getTechDocsMetadata(entity), + techDocsClient.getTechDocsMetadata(entityWithTechdocsRef), ).resolves.toEqual(mockTechDocsMetadata); }); }); @@ -113,7 +166,7 @@ describe('TechDocsClient', () => { beforeEach(() => { server.use( rest.get( - `${baseUrl}/default/component/some-handbook`, + `${baseUrl}/metadata/techdocs/default/component/some-handbook-no-techdocs-ref`, (_req, res, ctx) => { return res( ctx.status(404), @@ -126,7 +179,7 @@ describe('TechDocsClient', () => { it('throws an error', async () => { await expect( - techDocsClient.getTechDocsMetadata(entity), + techDocsClient.getTechDocsMetadata(entityWithoutTechdocsRef), ).rejects.toThrow('Not Found'); }); }); @@ -141,7 +194,7 @@ describe('TechDocsClient', () => { beforeEach(() => { server.use( rest.get( - `${baseUrl}/default/component/some-handbook/${filePath}`, + `${baseUrl}/static/docs/default/component/some-handbook/${filePath}`, (_req, res, ctx) => { return res(ctx.status(200), ctx.text(mockTechDocsStaticFile)); }, @@ -151,7 +204,7 @@ describe('TechDocsClient', () => { it('returns expected techdocs metadata', async () => { await expect( - techDocsClient.getTechDocsStaticFile(entity, filePath), + techDocsClient.getTechDocsStaticFile(entityWithTechdocsRef, filePath), ).resolves.toEqual(mockTechDocsStaticFile); }); }); @@ -160,7 +213,7 @@ describe('TechDocsClient', () => { beforeEach(() => { server.use( rest.get( - `${baseUrl}/default/component/some-handbook/${filePath}`, + `${baseUrl}/static/docs/default/component/some-handbook/${filePath}`, (_req, res, ctx) => { return res( ctx.status(404), @@ -173,7 +226,7 @@ describe('TechDocsClient', () => { it('throws an error', async () => { await expect( - techDocsClient.getTechDocsStaticFile(entity, filePath), + techDocsClient.getTechDocsStaticFile(entityWithTechdocsRef, filePath), ).rejects.toThrow('Not Found'); }); }); @@ -184,7 +237,9 @@ describe('TechDocsClient', () => { const mockRawHtml = `
-

This is the title

+

+ "This is the title" + #

I am a file

April 6, 2022
@@ -201,14 +256,16 @@ describe('TechDocsClient', () => { const mockRawHtml = `
-

This is the title

+

+ "This is the title with & persand" + #

I am a file

April 6, 2022
`; expect(techDocsClient.parseTitle(mockRawHtml)).toEqual( - 'This is the title', + 'This is the title with & persand', ); }); diff --git a/plugins/glean-backend/src/client/TechDocsClient.ts b/plugins/glean-backend/src/client/TechDocsClient.ts new file mode 100644 index 0000000..9c77cd8 --- /dev/null +++ b/plugins/glean-backend/src/client/TechDocsClient.ts @@ -0,0 +1,174 @@ +import { DEFAULT_NAMESPACE, Entity } from '@backstage/catalog-model'; +import { Config } from '@backstage/config'; +import { DiscoveryApi } from '@backstage/core-plugin-api'; +import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend'; +import fetch from 'node-fetch'; +import { parse } from 'node-html-parser'; +import { + CATALOG_FILTER_EXISTS, + CatalogApi, + GetEntitiesResponse, +} from '@backstage/catalog-client'; + +import { AuthService, LoggerService } from '@backstage/backend-plugin-api'; +import https from 'https'; + +export class TechDocsClient { + private agent = new https.Agent({ keepAlive: false }); + + static create({ + config, + discoveryApi, + logger, + auth, + catalogApi, + }: { + config: Config; + discoveryApi: DiscoveryApi; + catalogApi: CatalogApi; + logger: LoggerService; + auth: AuthService; + }) { + return new TechDocsClient(config, discoveryApi, logger, auth, catalogApi); + } + + constructor( + private readonly config: Config, + private readonly discoveryApi: DiscoveryApi, + private logger: LoggerService, + private readonly auth: AuthService, + private readonly catalogApi: CatalogApi, + ) {} + + async getTechDocsMetadataUrl(path: string = '') { + return `${await this.discoveryApi.getBaseUrl( + 'techdocs', + )}/metadata/techdocs/${path}`; + } + + async getTechDocsStaticUrl(path: string = '') { + return `${await this.discoveryApi.getBaseUrl( + 'techdocs', + )}/static/docs/${path}`; + } + + getEntityUri(entity: Entity) { + return `${entity.metadata.namespace ?? DEFAULT_NAMESPACE}/${entity.kind}/${ + entity.metadata.name + }`.toLowerCase(); + } + + getViewUrl(entity: Entity, filePath: string) { + const docsUrl = `${this.config.getString('app.baseUrl')}/docs`; + const entityUrl = `${docsUrl}/${this.getEntityUri(entity)}`; + return `${entityUrl}/${filePath.replace(/\/index\.html|\.html/, '')}`; + } + + async getTechDocsEntities(): Promise { + const entities = await this.getTechDocsEntitiesResponse(); + return entities?.items; + } + + async getTechDocsEntitiesResponse(): Promise { + const { token } = await this.auth.getPluginRequestToken({ + onBehalfOf: await this.auth.getOwnServiceCredentials(), + targetPluginId: 'catalog', + }); + + const entities = await this.catalogApi.getEntities( + { + filter: { + 'metadata.annotations.backstage.io/techdocs-ref': + CATALOG_FILTER_EXISTS, + }, + }, + { + token, + }, + ); + return entities; + } + + async getTechDocsMetadata(entity: Entity): Promise { + return await new Promise(async (resolve, reject) => { + try { + const { token } = await this.auth.getPluginRequestToken({ + onBehalfOf: await this.auth.getOwnServiceCredentials(), + targetPluginId: 'techdocs', + }); + + const url = await this.getTechDocsMetadataUrl( + this.getEntityUri(entity), + ); + this.logger.debug(`getTechDocsMetadata fetch URL ${url}`); + const response = await fetch(url, { + method: 'GET', + headers: { + Accept: 'application/json', + Authorization: `Bearer ${token}`, + }, + ...this.agent, + }); + if (!response.ok) { + throw new Error(response.statusText); + } + resolve(response.json()); + } catch (err) { + if (err instanceof Error) { + reject(new Error(err.message)); + } + } + }); + } + + async getTechDocsStaticFile(entity: Entity, filePath: string) { + return await new Promise(async (resolve, reject) => { + try { + const { token } = await this.auth.getPluginRequestToken({ + onBehalfOf: await this.auth.getOwnServiceCredentials(), + targetPluginId: 'techdocs', + }); + + const url = await this.getTechDocsStaticUrl( + `${this.getEntityUri(entity)}/${filePath}`, + ); + this.logger.debug(`getTechDocsStaticFile fetch URL ${url}`); + const response = await fetch(url, { + method: 'GET', + headers: { + Accept: 'text/plain', + Authorization: `Bearer ${token}`, + }, + ...this.agent, + }); + if (!response.ok) { + throw new Error(response.statusText); + } + resolve(response.text()); + } catch (err) { + if (err instanceof Error) { + reject(new Error(err.message)); + } + } + }); + } + + parseUpdatedAt(rawHtml: string): Date { + const root = parse(rawHtml); + const updatedAtDateString = root.querySelector( + '.git-revision-date-localized-plugin.git-revision-date-localized-plugin-date', + )?.rawText; + return updatedAtDateString ? new Date(updatedAtDateString) : new Date(); + } + + parseTitle(rawHtml: string): string | undefined { + const root = parse(rawHtml); + const h1 = root.querySelector('h1'); + return h1?.rawText + .replaceAll('&', '&') + .replaceAll('¶', '') + .replaceAll('#', '') + .replaceAll('"', '') + .trim(); + } +} diff --git a/plugins/glean-backend/src/service/fixtures/staticTechDocsHtml.ts b/plugins/glean-backend/src/client/fixtures/staticTechDocsHtml.ts similarity index 91% rename from plugins/glean-backend/src/service/fixtures/staticTechDocsHtml.ts rename to plugins/glean-backend/src/client/fixtures/staticTechDocsHtml.ts index c7d4eee..2028bae 100644 --- a/plugins/glean-backend/src/service/fixtures/staticTechDocsHtml.ts +++ b/plugins/glean-backend/src/client/fixtures/staticTechDocsHtml.ts @@ -2,7 +2,7 @@ export const htmlFixture = ` - + @@ -54,8 +54,8 @@ export const htmlFixture = `
  • - - Welcome to Wealthsimple Engineering + + Welcome to Company Engineering
  • @@ -80,7 +80,7 @@ export const htmlFixture = `
    • - Welcome to Wealthsimple Engineering + Welcome to Company Engineering
    • @@ -152,8 +152,8 @@ export const htmlFixture = `

      Stack Guides

      @@ -231,7 +231,7 @@ export const htmlFixture = ` href="#engineering-architecture" title="Permanent link">¶

      Interviewing

      Management


      @@ -261,13 +261,13 @@ export const htmlFixture = `