Skip to content

Commit

Permalink
fix: correct metasmoke search URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
double-beep authored Sep 4, 2024
1 parent 552774d commit da4636d
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 20 deletions.
5 changes: 3 additions & 2 deletions src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ function updateKeywordLists(
break;
default:
}
} catch (error) {
console.error('An error occurred', error);
} catch {
// eslint-disable-next-line no-useless-return
return;
}
}

Expand Down
51 changes: 45 additions & 6 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,56 @@ const metasmokeSearchUrl = 'https://metasmoke.erwaysoftware.com/search';

// export in an object for the tests
export const helpers = {
generateSearchRegex: (text: string): string => {
// https://chat.stackexchange.com/transcript/message/55327802
// (slightly modified to improve readability)

let searchTerm = `(?s)(?:^|\\b)${text}(?:\\b|$)`;
const textNoNoncaptureGroups = text
.replace(/\(\?:/g, '(')
.replace(/\(\?-i:([^()]+)\)/, '$1');

const regex = /^(\w+(?![?*+{])|\(\?-i:[^+?*{}()|]+\)\w*(?![?*+{]))/;

if (!/[+?*{}()|]/.test(textNoNoncaptureGroups)) {
searchTerm = `(?s)${text}(?<=(?:^|\\b)${text})(?:\\b|$)`;
} else if (regex.test(text)) {
const replaced = text.replace(
regex,
'$1(?<=(?:^|\\b)$1)'
);

searchTerm = `(?s)${replaced}(?:\\b|$)`;
}

return searchTerm;
},

// should be the same as "See the MS search here" text in PRs
getMetasmokeSearchUrl: (term: string): string => {
const searchTerm = term.includes('.') // it's a domain
const text = term.includes('.') // it's a domain
? term
: helpers.getRegexForPathShortener(term);

const bodyParam = `(?s:\\b${searchTerm}\\b)`;
const parameters = `?utf8=✓&body_is_regex=1&body=${bodyParam}`;
const fullUrl = metasmokeSearchUrl + parameters;
const unescaped = term.replace(/\\./g, '.');
const searchTerm = helpers.isBlacklisted(unescaped)
? `(?i)${text}`
: helpers.generateSearchRegex(text);

const url = new URL(metasmokeSearchUrl);
url.searchParams.set('utf8', '✓');
// use OR instead of default AND
url.searchParams.set('or_search', '1');

url.searchParams.set('title_is_regex', '1');
url.searchParams.set('body_is_regex', '1');
url.searchParams.set('username_is_regex', '1');

url.searchParams.set('title', searchTerm);
url.searchParams.set('body', searchTerm);
url.searchParams.set('username', searchTerm);

return encodeURI(fullUrl);
return url.toString();
},

// Follow https://charcoal-se.org/smokey/Guidance-for-Blacklisting-and-Watching:
Expand Down Expand Up @@ -113,7 +152,7 @@ export const helpers = {
getRegexForPathShortener: (path: string, domain?: string): string => {
// https://stackoverflow.com/a/3561711
// https://chat.stackexchange.com/transcript/message/65665204
const escaped = path.replace(/[\\^$*?.()|[\]{}]/g, '\\$&');
const escaped = path.replace(/[+\\^$*?.()|[\]{}]/g, '\\$&');
const mainPart = `(?-i:${escaped})`;
const comment = `(?#${domain || ''})`;

Expand Down
15 changes: 9 additions & 6 deletions src/metasmoke.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,14 @@ function getPostCounts(parsedHtml: Document): number[] {
}

export function getMsSearchResults(term: string): Promise<number[]> {
const encoded = encodeURIComponent(term);
const url = new URL('https://metasmoke.erwaysoftware.com/search');
url.searchParams.set('utf8', '✓');
url.searchParams.set('body', term);

return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'GET',
url: `https://metasmoke.erwaysoftware.com/search?utf8=✓&body=${encoded}`,
url: url.toString(),
onload: response => {
const { status, responseText } = response;

Expand All @@ -128,11 +130,12 @@ export function getMsSearchResults(term: string): Promise<number[]> {
}

export async function getAllDomainsFromPost(metasmokePostId: number): Promise<DomainsForPostIdItems[]> {
const method = `${metasmokePostId}/domains`;
const parameters = `?key=${metasmokeApiKey}&filter=${postDomainsApiFilter}&per_page=100`;
const msApiUrl = metasmokeApiBase + method + parameters;
const url = new URL(`${metasmokeApiBase}${metasmokePostId}/domains`);
url.searchParams.set('key', metasmokeApiKey);
url.searchParams.set('filter', postDomainsApiFilter);
url.searchParams.set('per_page', '100');

const apiCallResponse = await fetch(msApiUrl);
const apiCallResponse = await fetch(url.toString());
const jsonResponse = await apiCallResponse.json() as DomainsForPostIdResponse;

return jsonResponse.items;
Expand Down
35 changes: 29 additions & 6 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,36 @@ describe('index helpers', () => {
// test the whitelisted domains and the redirectors which are all valid domains
[...Domains.whitelisted, ...Domains.redirectors]
.filter(domain => domain.includes('.')) // exclude exception
.map(domain => domain.replace(/\./g, '\\.'))
.forEach(domainName => {
const msSearchUrl = helpers.getMetasmokeSearchUrl(domainName);
const urlObject = new URL(msSearchUrl);
const body = urlObject.searchParams.get('body');

expect(body).to.be.equal(`(?s:\\b${domainName}\\b)`);
const url = new URL(msSearchUrl);

const title = url.searchParams.get('title');
const body = url.searchParams.get('body');
const username = url.searchParams.get('username');

expect(body)
.to.be.equal(title)
.to.be.equal(username)
.to.be.equal(
helpers.isBlacklisted(
// unescape
domainName.replace(/\\./g, '.')
)
? `(?i)${domainName}`
: String.raw`(?s)${domainName}(?<=(?:^|\b)${domainName})(?:\b|$)`
);

const or = url.searchParams.get('or_search');
expect(or).to.equal('1');
});

const searchUrl = helpers.getMetasmokeSearchUrl('speakatoo\\.com');
const url = new URL(searchUrl);
const body = url.searchParams.get('body');

expect(body).to.be.equal(`(?i)speakatoo\\.com`);
});

it('should figure out if a domain is caught or not', () => {
Expand Down Expand Up @@ -112,8 +135,8 @@ describe('index helpers', () => {
'3vcWir3': ['bit.ly', '(?-i:3vcWir3)(?#bit.ly)'],
'FNEuyd': ['goo.gl', '(?-i:FNEuyd)(?#goo.gl)'],
'KdxEAt91D7k': ['youtu.be', '(?-i:KdxEAt91D7k)(?#youtu.be)'],
// don't escape +
'+jJyLwSpqLeAzNmFi': ['t.me', String.raw`(?-i:+jJyLwSpqLeAzNmFi)(?#t.me)`],
// escape +
'+jJyLwSpqLeAzNmFi': ['t.me', String.raw`(?-i:\+jJyLwSpqLeAzNmFi)(?#t.me)`],
// don't escape /
'davitacols/dataDisk': ['github repository', String.raw`(?-i:davitacols/dataDisk)(?#github repository)`],
'arjun.muralidharan2': ['facebook', String.raw`(?-i:arjun\.muralidharan2)(?#facebook)`],
Expand Down

0 comments on commit da4636d

Please sign in to comment.