-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(glean-backend): working indexer
- Loading branch information
Mark Shields
committed
Dec 17, 2024
1 parent
c23233e
commit 56affdc
Showing
21 changed files
with
905 additions
and
696 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,30 @@ | ||
import { SchedulerServiceTaskScheduleDefinitionConfig } from '@backstage/backend-plugin-api'; | ||
|
||
export interface Config { | ||
/** | ||
* Glean plugin configuration. | ||
*/ | ||
glean?: { | ||
/** | ||
* The base url of the Glean API | ||
* The index url of the Glean API | ||
*/ | ||
apiIndexUrl: string; | ||
|
||
/** | ||
* The data source of the Glean API to use | ||
* See: https://support.glean.com/hc/en-us/articles/30038992119451-Data-Sources | ||
*/ | ||
apiBaseUrl: string; | ||
datasource: string; | ||
|
||
/** | ||
* The api token | ||
* @visibility secret | ||
*/ | ||
token: string; | ||
|
||
/** | ||
* The Scheduler for how often to run Glean indexing | ||
*/ | ||
schedule?: SchedulerServiceTaskScheduleDefinitionConfig; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
// This package should be installed as a `dev` dependency | ||
import { createBackend } from '@backstage/backend-defaults'; | ||
|
||
const backend = createBackend(); | ||
// Path to the file where the plugin is export as default | ||
backend.add(import('../src')); | ||
backend.start(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
232 changes: 232 additions & 0 deletions
232
plugins/glean-backend/src/client/GleanIndexClient.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
import { getVoidLogger } from '@backstage/backend-common'; | ||
import { mockServices } from '@backstage/backend-test-utils'; | ||
import { catalogServiceMock } from '@backstage/plugin-catalog-node/testUtils'; | ||
import { Entity } from '@backstage/catalog-model'; | ||
import { ConfigReader } from '@backstage/config'; | ||
import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend'; | ||
import { rest } from 'msw'; | ||
import { setupServer } from 'msw/node'; | ||
import { GleanIndexClient } from './GleanIndexClient'; | ||
import { htmlFixture } from './fixtures/staticTechDocsHtml'; | ||
import { GleanDocument } from './types'; | ||
|
||
describe('GleanIndexClient', () => { | ||
let gleanIndexClient: GleanIndexClient; | ||
const server = setupServer(); | ||
const discoveryApi = { getBaseUrl: jest.fn() }; | ||
const gleanApiIndexUrl = | ||
'https://customer-be.glean.com/api/index/v1/bulkindexdocuments'; | ||
const auth = mockServices.auth(); | ||
|
||
const config = new ConfigReader({ | ||
backend: { | ||
baseUrl: 'http://localhost', | ||
listen: { port: 7000 }, | ||
}, | ||
app: { | ||
baseUrl: 'http://localhost', | ||
listen: { port: 3000 }, | ||
}, | ||
glean: { | ||
apiIndexUrl: gleanApiIndexUrl, | ||
token: 'I-am-a-token', | ||
datasource: 'I-am-a-datasource', | ||
}, | ||
}); | ||
|
||
const entityWithUrlRef: Entity = { | ||
apiVersion: 'backstage.io/v1alpha1', | ||
kind: 'Component', | ||
metadata: { | ||
name: 'some-handbook-with-url-ref', | ||
namespace: 'default', | ||
annotations: { | ||
'backstage.io/techdocs-ref': 'url:some_url', | ||
}, | ||
spec: {}, | ||
}, | ||
}; | ||
const entityWithDirRef: Entity = { | ||
apiVersion: 'backstage.io/v1alpha1', | ||
kind: 'Component', | ||
metadata: { | ||
name: 'some-handbook-with-dir-ref', | ||
namespace: 'default', | ||
annotations: { | ||
'backstage.io/techdocs-ref': 'dir:.', | ||
}, | ||
spec: {}, | ||
}, | ||
}; | ||
const entities = [entityWithUrlRef, entityWithDirRef]; | ||
const catalogApi = catalogServiceMock({ entities }); | ||
|
||
beforeAll(() => server.listen()); | ||
|
||
beforeEach(() => { | ||
gleanIndexClient = GleanIndexClient.create({ | ||
auth, | ||
catalogApi, | ||
config, | ||
discoveryApi, | ||
logger: getVoidLogger(), | ||
}); | ||
}); | ||
|
||
afterEach(() => { | ||
jest.resetAllMocks(); | ||
server.resetHandlers(); | ||
}); | ||
|
||
afterAll(() => server.close()); | ||
|
||
describe('create', () => { | ||
it('returns a new instance of GleanIndexClient', () => { | ||
expect( | ||
GleanIndexClient.create({ | ||
auth, | ||
catalogApi, | ||
config, | ||
discoveryApi, | ||
logger: getVoidLogger(), | ||
}), | ||
).toBeInstanceOf(GleanIndexClient); | ||
}); | ||
}); | ||
|
||
describe('parseMainContent', () => { | ||
it('removes all nav elements from HTML', () => { | ||
expect(htmlFixture).toEqual(expect.stringContaining('<nav')); | ||
// eslint-disable-next-line dot-notation | ||
expect(gleanIndexClient['parseMainContent'](htmlFixture)).toEqual( | ||
expect.not.stringContaining('<nav'), | ||
); | ||
}); | ||
}); | ||
|
||
describe('buildDocument', () => { | ||
beforeEach(() => { | ||
// eslint-disable-next-line dot-notation | ||
gleanIndexClient['techDocsClient'].getTechDocsStaticFile = jest | ||
.fn() | ||
.mockResolvedValue(htmlFixture); | ||
}); | ||
|
||
it('returns a document object', async () => { | ||
expect( | ||
await gleanIndexClient.buildDocument( | ||
entityWithUrlRef, | ||
'foo/index.html', | ||
), | ||
).toEqual({ | ||
id: 'default/component/some-handbook-with-url-ref/foo/index.html', | ||
title: 'Engineering Handbook', | ||
container: 'some-handbook-with-url-ref', | ||
datasource: 'I-am-a-datasource', | ||
viewURL: | ||
'http://localhost/docs/default/component/some-handbook-with-url-ref/foo', | ||
body: { | ||
mimeType: 'HTML', | ||
textContent: expect.stringContaining( | ||
"Welcome to Company's Engineering Handbook!", | ||
), | ||
}, | ||
updatedAt: Math.floor(new Date('April 6, 2022').getTime() / 1000), | ||
permissions: { allowAnonymousAccess: true }, | ||
}); | ||
}); | ||
}); | ||
|
||
describe('batchIndexTechDocs', () => { | ||
const mockDocument: GleanDocument = { | ||
id: 'document-1', | ||
title: 'I am a document', | ||
container: 'some-handbook', | ||
datasource: 'I-am-a-datasource', | ||
viewURL: 'http://backstage.w10e.com', | ||
body: { | ||
mimeType: 'HTML', | ||
textContent: 'I am some text content', | ||
}, | ||
updatedAt: 1652818028, | ||
permissions: { allowAnonymousAccess: true }, | ||
}; | ||
|
||
const mockTechDocsMetadata: TechDocsMetadata = { | ||
site_name: 'some-handbook', | ||
site_description: 'Company',s Engineering Handbook', | ||
etag: '38cf6ed97f8c501426a0e311b76d67c69fc46df3', | ||
build_timestamp: 1652796973948, | ||
files: ['index.html', 'interviewing/index.html', 'onboarding.html'], | ||
}; | ||
|
||
beforeEach(() => { | ||
jest | ||
.spyOn(gleanIndexClient, 'buildDocument') | ||
.mockResolvedValue(mockDocument); | ||
jest | ||
.spyOn(gleanIndexClient, 'indexDocuments') | ||
.mockResolvedValue('response'); | ||
|
||
// eslint-disable-next-line dot-notation | ||
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest | ||
.fn() | ||
.mockResolvedValue(mockTechDocsMetadata); | ||
|
||
server.use( | ||
rest.post(`${gleanApiIndexUrl}`, (_req, res, ctx) => { | ||
return res(ctx.status(200)); | ||
}), | ||
); | ||
}); | ||
|
||
it('uploads the Glean documents', async () => { | ||
const indexTechDocs = await gleanIndexClient.batchIndexDocuments( | ||
'upload-', | ||
[mockDocument], | ||
); | ||
expect(gleanIndexClient.indexDocuments).toHaveBeenCalledTimes(1); | ||
expect(indexTechDocs).toEqual(1); | ||
}); | ||
|
||
it('builds and uploads the Glean documents for all entities', async () => { | ||
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs( | ||
entities, | ||
); | ||
expect(batchIndexTechDocs.uploadId).toContain('upload-'); | ||
expect(batchIndexTechDocs.batchCount).toEqual(1); | ||
}); | ||
|
||
describe('when there are no files to index', () => { | ||
beforeEach(() => { | ||
// eslint-disable-next-line dot-notation | ||
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest | ||
.fn() | ||
.mockResolvedValue({ ...mockTechDocsMetadata, files: [] }); | ||
}); | ||
|
||
it('does not index tech docs with Glean', async () => { | ||
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs( | ||
[], | ||
); | ||
expect(gleanIndexClient.buildDocument).not.toHaveBeenCalled(); | ||
expect(batchIndexTechDocs.uploadId).toContain('upload-'); | ||
expect(batchIndexTechDocs.batchCount).toEqual(0); | ||
}); | ||
}); | ||
}); | ||
|
||
describe('batchIndex', () => { | ||
beforeEach(() => { | ||
jest.spyOn(gleanIndexClient, 'batchIndexTechDocs').mockResolvedValue({ | ||
uploadId: 'upload-7bbf4c41-b73a-4ca2-8245-a23a0c4f37e7', | ||
batchCount: 1, | ||
}); | ||
}); | ||
|
||
it('indexes the TechDocs entities', async () => { | ||
await gleanIndexClient.batchIndex(entities); | ||
expect(gleanIndexClient.batchIndexTechDocs).toHaveBeenCalledTimes(1); | ||
}); | ||
}); | ||
}); |
Oops, something went wrong.