Skip to content

Commit

Permalink
feat(glean-backend): working indexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Shields committed Dec 17, 2024
1 parent c23233e commit 4a96f45
Show file tree
Hide file tree
Showing 21 changed files with 896 additions and 696 deletions.
3 changes: 0 additions & 3 deletions plugins/glean-backend/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Glean Backend

> NOTE: This is currently not in use anymore. However, it could be re-purposed
> to index other parts of Backstage (e.g. catalog entities, tools, etc.)
Welcome to the Glean backend plugin!

This backend plugin is used to make our Backstage content available in
Expand Down
17 changes: 15 additions & 2 deletions plugins/glean-backend/config.d.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
import { SchedulerServiceTaskScheduleDefinitionConfig } from '@backstage/backend-plugin-api';

export interface Config {
/**
* Glean plugin configuration.
*/
glean?: {
/**
* The base url of the Glean API
* The index url of the Glean API
*/
apiIndexUrl: string;

/**
* The data source of the Glean API to use
* See: https://support.glean.com/hc/en-us/articles/30038992119451-Data-Sources
*/
apiBaseUrl: string;
datasource: string;

/**
* The api token
* @visibility secret
*/
token: string;

/**
* The Scheduler for how often to run Glean indexing
*/
schedule?: SchedulerServiceTaskScheduleDefinitionConfig;
};
}
7 changes: 7 additions & 0 deletions plugins/glean-backend/dev/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// This package should be installed as a `dev` dependency
import { createBackend } from '@backstage/backend-defaults';

const backend = createBackend();
// Path to the file where the plugin is export as default
backend.add(import('../src'));
backend.start();
29 changes: 22 additions & 7 deletions plugins/glean-backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@
"postpack": "backstage-cli package postpack"
},
"dependencies": {
"@backstage/backend-common": "^0.21.7",
"@backstage/catalog-model": "^1.4.5",
"@backstage/config": "^1.2.0",
"@backstage/core-plugin-api": "^1.9.2",
"@backstage/plugin-techdocs-backend": "^1.10.13",
"@backstage/backend-common": "^0.25.0",
"@backstage/backend-plugin-api": "^1.0.2",
"@backstage/catalog-client": "^1.8.0",
"@backstage/catalog-model": "^1.7.1",
"@backstage/config": "^1.3.0",
"@backstage/core-plugin-api": "^1.10.1",
"@backstage/errors": "^1.2.5",
"@backstage/plugin-catalog-node": "^1.14.0",
"@backstage/plugin-techdocs": "^1.11.2",
"@backstage/plugin-techdocs-backend": "^1.11.3",
"@types/express": "*",
"@types/supertest": "^6.0.2",
"@types/uuid": "^9.0.8",
Expand All @@ -36,12 +41,22 @@
"lodash": "^4.17.21",
"node-fetch": "^2.7.0",
"node-html-parser": "^6.1.13",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-router-dom": "^7.0.2",
"uuid": "^9.0.1",
"winston": "^3.13.0",
"yn": "^5.0.0"
},
"devDependencies": {
"@backstage/cli": "^0.26.4",
"@backstage/backend-defaults": "^0.5.3",
"@backstage/backend-test-utils": "^1.1.0",
"@backstage/cli": "^0.29.2",
"@backstage/test-utils": "^1.7.2",
"@testing-library/dom": "^10.4.0",
"@testing-library/react": "^16.1.0",
"@types/jest": "^29.5.14",
"@types/react": "^19",
"@types/react-dom": "^19",
"msw": "^1.3.3",
"supertest": "^6.3.4"
},
Expand Down
232 changes: 232 additions & 0 deletions plugins/glean-backend/src/client/GleanIndexClient.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
import { getVoidLogger } from '@backstage/backend-common';
import { mockServices } from '@backstage/backend-test-utils';
import { catalogServiceMock } from '@backstage/plugin-catalog-node/testUtils';
import { Entity } from '@backstage/catalog-model';
import { ConfigReader } from '@backstage/config';
import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend';
import { rest } from 'msw';
import { setupServer } from 'msw/node';
import { GleanIndexClient } from './GleanIndexClient';
import { htmlFixture } from './fixtures/staticTechDocsHtml';
import { GleanDocument } from './types';

describe('GleanIndexClient', () => {
let gleanIndexClient: GleanIndexClient;
const server = setupServer();
const discoveryApi = { getBaseUrl: jest.fn() };
const gleanApiIndexUrl =
'https://customer-be.glean.com/api/index/v1/bulkindexdocuments';
const auth = mockServices.auth();

const config = new ConfigReader({
backend: {
baseUrl: 'http://localhost',
listen: { port: 7000 },
},
app: {
baseUrl: 'http://localhost',
listen: { port: 3000 },
},
glean: {
apiIndexUrl: gleanApiIndexUrl,
token: 'I-am-a-token',
datasource: 'I-am-a-datasource',
},
});

const entityWithUrlRef: Entity = {
apiVersion: 'backstage.io/v1alpha1',
kind: 'Component',
metadata: {
name: 'some-handbook-with-url-ref',
namespace: 'default',
annotations: {
'backstage.io/techdocs-ref': 'url:some_url',
},
spec: {},
},
};
const entityWithDirRef: Entity = {
apiVersion: 'backstage.io/v1alpha1',
kind: 'Component',
metadata: {
name: 'some-handbook-with-dir-ref',
namespace: 'default',
annotations: {
'backstage.io/techdocs-ref': 'dir:.',
},
spec: {},
},
};
const entities = [entityWithUrlRef, entityWithDirRef];
const catalogApi = catalogServiceMock({ entities });

beforeAll(() => server.listen());

beforeEach(() => {
gleanIndexClient = GleanIndexClient.create({
auth,
catalogApi,
config,
discoveryApi,
logger: getVoidLogger(),
});
});

afterEach(() => {
jest.resetAllMocks();
server.resetHandlers();
});

afterAll(() => server.close());

describe('create', () => {
it('returns a new instance of GleanIndexClient', () => {
expect(
GleanIndexClient.create({
auth,
catalogApi,
config,
discoveryApi,
logger: getVoidLogger(),
}),
).toBeInstanceOf(GleanIndexClient);
});
});

describe('parseMainContent', () => {
it('removes all nav elements from HTML', () => {
expect(htmlFixture).toEqual(expect.stringContaining('<nav'));
// eslint-disable-next-line dot-notation
expect(gleanIndexClient['parseMainContent'](htmlFixture)).toEqual(
expect.not.stringContaining('<nav'),
);
});
});

describe('buildDocument', () => {
beforeEach(() => {
// eslint-disable-next-line dot-notation
gleanIndexClient['techDocsClient'].getTechDocsStaticFile = jest
.fn()
.mockResolvedValue(htmlFixture);
});

it('returns a document object', async () => {
expect(
await gleanIndexClient.buildDocument(
entityWithUrlRef,
'foo/index.html',
),
).toEqual({
id: 'default/component/some-handbook-with-url-ref/foo/index.html',
title: 'Engineering Handbook',
container: 'some-handbook-with-url-ref',
datasource: 'I-am-a-datasource',
viewURL:
'http://localhost/docs/default/component/some-handbook-with-url-ref/foo',
body: {
mimeType: 'HTML',
textContent: expect.stringContaining(
"Welcome to Company's Engineering Handbook!",
),
},
updatedAt: Math.floor(new Date('April 6, 2022').getTime() / 1000),
permissions: { allowAnonymousAccess: true },
});
});
});

describe('batchIndexTechDocs', () => {
const mockDocument: GleanDocument = {
id: 'document-1',
title: 'I am a document',
container: 'some-handbook',
datasource: 'I-am-a-datasource',
viewURL: 'http://backstage.w10e.com',
body: {
mimeType: 'HTML',
textContent: 'I am some text content',
},
updatedAt: 1652818028,
permissions: { allowAnonymousAccess: true },
};

const mockTechDocsMetadata: TechDocsMetadata = {
site_name: 'some-handbook',
site_description: 'Company&#x27,s Engineering Handbook',
etag: '38cf6ed97f8c501426a0e311b76d67c69fc46df3',
build_timestamp: 1652796973948,
files: ['index.html', 'interviewing/index.html', 'onboarding.html'],
};

beforeEach(() => {
jest
.spyOn(gleanIndexClient, 'buildDocument')
.mockResolvedValue(mockDocument);
jest
.spyOn(gleanIndexClient, 'indexDocuments')
.mockResolvedValue('response');

// eslint-disable-next-line dot-notation
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest
.fn()
.mockResolvedValue(mockTechDocsMetadata);

server.use(
rest.post(`${gleanApiIndexUrl}`, (_req, res, ctx) => {
return res(ctx.status(200));
}),
);
});

it('uploads the Glean documents', async () => {
const indexTechDocs = await gleanIndexClient.batchIndexDocuments(
'upload-',
[mockDocument],
);
expect(gleanIndexClient.indexDocuments).toHaveBeenCalledTimes(1);
expect(indexTechDocs).toEqual(1);
});

it('builds and uploads the Glean documents for all entities', async () => {
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs(
entities,
);
expect(batchIndexTechDocs.uploadId).toContain('upload-');
expect(batchIndexTechDocs.batchCount).toEqual(1);
});

describe('when there are no files to index', () => {
beforeEach(() => {
// eslint-disable-next-line dot-notation
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest
.fn()
.mockResolvedValue({ ...mockTechDocsMetadata, files: [] });
});

it('does not index tech docs with Glean', async () => {
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs(
[],
);
expect(gleanIndexClient.buildDocument).not.toHaveBeenCalled();
expect(batchIndexTechDocs.uploadId).toContain('upload-');
expect(batchIndexTechDocs.batchCount).toEqual(0);
});
});
});

describe('batchIndex', () => {
beforeEach(() => {
jest.spyOn(gleanIndexClient, 'batchIndexTechDocs').mockResolvedValue({
uploadId: 'upload-7bbf4c41-b73a-4ca2-8245-a23a0c4f37e7',
batchCount: 1,
});
});

it('indexes the TechDocs entities', async () => {
await gleanIndexClient.batchIndex(entities);
expect(gleanIndexClient.batchIndexTechDocs).toHaveBeenCalledTimes(1);
});
});
});
Loading

0 comments on commit 4a96f45

Please sign in to comment.