Skip to content

Commit

Permalink
fix(bulk-import): handle Location entities created from other sources…
Browse files Browse the repository at this point in the history
… like the GH discovery as Import candidates [RHIDP-4021] (#2178)

* Display the resolved GH URLs w/o placeholders

This helps troubleshoot cache hits or misses.

* Make sure not to override all request headers passed to Octokit clients

* Load potential Imports from catalog Location entities

This covers the case of repos added
from other sources like the auto-discovery plugin

* Consider only as valid Import candidates repos that are accessible from the configured GH integrations

This should solve the issue of repos added through the auto-discovery plugin
(which seems to rely anyway on the configured GH integrations).
And for the ones coming from other sources (like app-config),
we won't show up those that are not accessible from the GH integrations
(case of any public repos out there added in some app-config.yaml).
Because ultimately, the source of truth of the bulk import right now
is the repos and orgs accessible from the GH integrations.
  • Loading branch information
rm3l authored Sep 16, 2024
1 parent b264558 commit 170ec74
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 24 deletions.
39 changes: 37 additions & 2 deletions plugins/bulk-import-backend/src/helpers/catalogInfoGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import { PluginEndpointDiscovery } from '@backstage/backend-common';
import { AuthService, DiscoveryService } from '@backstage/backend-plugin-api';
import { CatalogApi } from '@backstage/catalog-client';
import { LocationEntity } from '@backstage/catalog-model';
import type { Config } from '@backstage/config';

import gitUrlParse from 'git-url-parse';
Expand Down Expand Up @@ -134,12 +135,13 @@ ${jsYaml.dump(generatedEntity.entity)}`,
async listCatalogUrlLocationsById(
config: Config,
search?: string,
_pageNumber: number = DefaultPageNumber,
_pageSize: number = DefaultPageSize,
pageNumber: number = DefaultPageNumber,
pageSize: number = DefaultPageSize,
): Promise<{ id?: string; target: string }[]> {
const result = await Promise.all([
this.listCatalogUrlLocationsFromConfig(config, search),
this.listCatalogUrlLocationsByIdFromLocationsEndpoint(search),
this.listCatalogUrlLocationEntitiesById(search, pageNumber, pageSize),
]);
return result.flat();
}
Expand Down Expand Up @@ -196,6 +198,39 @@ ${jsYaml.dump(generatedEntity.entity)}`,
return this.filterLocations(res, search);
}

async listCatalogUrlLocationEntitiesById(
search?: string,
_pageNumber: number = DefaultPageNumber,
_pageSize: number = DefaultPageSize,
): Promise<{ id?: string; target: string }[]> {
const result = await this.catalogApi.getEntities(
{
filter: {
kind: 'Location',
},
// There is no query parameter to find entities with target URLs containing a string.
// The existing filter does an exact matching. That's why we are retrieving this hard-coded high number of Locations.
limit: 1000,
offset: 0,
},
{
token: await getTokenForPlugin(this.auth, 'catalog'),
},
);
const locations = (result?.items ?? []) as LocationEntity[];
const res = locations
.filter(
location => location.spec?.target && location.spec?.type === 'url',
)
.map(location => {
return {
id: location.metadata.uid,
target: location.spec.target!,
};
});
return this.filterLocations(res, search);
}

private filterLocations(
res: { id: string | undefined; target: string }[],
search: string | undefined,
Expand Down
89 changes: 79 additions & 10 deletions plugins/bulk-import-backend/src/service/githubApiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,34 +109,40 @@ export class GithubApiService {
}

private registerHooks(octokit: Octokit) {
const extractCacheKey = (options: any) => {
const toFinalUrl = (options: any) => {
// options.url might contain placeholders => need to replace them with their actual values to not get colliding keys
const finalUrl = options.url.replace(/{([^}]+)}/g, (_: any, key: any) => {
return options.url.replace(/{([^}]+)}/g, (_: any, key: any) => {
return options[key] ?? `{${key}}`; // Replace with actual value, or leave unchanged if not found
});
return `${options.method}--${finalUrl}`;
};

const extractCacheKey = (options: any) =>
`${options.method}--${toFinalUrl(options)}`;

octokit.hook.before('request', async options => {
const headers: any = {};
if (!options.headers) {
options.headers = {
accept: 'application/json',
'user-agent': 'rhdh/bulk-import',
};
}
// Use ETag from in-memory cache if available
const cacheKey = extractCacheKey(options);
const existingEtag = await this.cache
.get(cacheKey)
.then(val => (val as any)?.etag);
?.then(val => (val as any)?.etag);
if (existingEtag) {
headers['If-None-Match'] = existingEtag;
options.headers['If-None-Match'] = existingEtag;
} else {
this.logger.debug(`cache miss for key "${cacheKey}"`);
}
options.headers = headers;
});

octokit.hook.after('request', async (response, options) => {
this.logger.debug(
`[GH API] ${options.method} ${options.url}: ${response.status}`,
`[GH API] ${options.method} ${toFinalUrl(options)}: ${response.status}`,
);
// If we get a 200 OK, the resource has changed, so update the in-memory cache
// If we get a successful response, the resource has changed, so update the in-memory cache
const cacheKey = extractCacheKey(options);
await this.cache.set(
cacheKey,
Expand All @@ -150,7 +156,7 @@ export class GithubApiService {

octokit.hook.error('request', async (error: any, options) => {
this.logger.debug(
`[GH API] ${options.method} ${options.url}: ${error.status}`,
`[GH API] ${options.method} ${toFinalUrl(options)}: ${error.status}`,
);
if (error.status !== 304) {
throw error;
Expand Down Expand Up @@ -353,6 +359,69 @@ export class GithubApiService {
};
}

async filterLocationsAccessibleFromIntegrations(
locationUrls: string[],
): Promise<string[]> {
const locationGitOwnerMap = new Map<string, string>();
for (const locationUrl of locationUrls) {
const split = locationUrl.split('/blob/');
if (split.length < 2) {
continue;
}
locationGitOwnerMap.set(locationUrl, gitUrlParse(split[0]).owner);
}

const ghConfigs = this.verifyAndGetIntegrations();
const credentialsByConfig =
await this.getCredentialsFromIntegrations(ghConfigs);
const allAccessibleAppOrgs = new Set<string>();
const allAccessibleTokenOrgs = new Set<string>();
const allAccessibleUsernames = new Set<string>();
for (const [ghConfig, credentials] of credentialsByConfig) {
for (const credential of credentials) {
const octokit = this.buildOcto(
{ credential, errors: undefined },
ghConfig.apiBaseUrl,
);
if (!octokit) {
continue;
}
if (isGithubAppCredential(credential)) {
const appOrgMap = await this.getAllAppOrgs(
ghConfig,
credential.accountLogin,
);
for (const [_, ghOrg] of appOrgMap) {
allAccessibleAppOrgs.add(ghOrg.name);
}
} else {
// find authenticated GitHub owner...
const username = (await octokit.rest.users.getAuthenticated())?.data
?.login;
if (username) {
allAccessibleUsernames.add(username);
}
// ... along with orgs accessible from the token auth
(await octokit.paginate(octokit.rest.orgs.listForAuthenticatedUser))
?.map(org => org.login)
?.forEach(orgName => allAccessibleTokenOrgs.add(orgName));
}
}
}

return locationUrls.filter(loc => {
if (!locationGitOwnerMap.has(loc)) {
return false;
}
const owner = locationGitOwnerMap.get(loc)!;
return (
allAccessibleAppOrgs.has(owner) ||
allAccessibleTokenOrgs.has(owner) ||
allAccessibleUsernames.has(owner)
);
});
}

/**
* Adds the repositories accessible by the provided github app to the provided repositories Map<string, GithubRepository>
* If any errors occurs, adds them to the provided errors Map<number, GithubFetchError>
Expand Down
Loading

0 comments on commit 170ec74

Please sign in to comment.