From f028748c394c300ca45a01bfb990fbbbfcf6ca3a Mon Sep 17 00:00:00 2001 From: double beep <38133098+double-beep@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:48:20 +0000 Subject: [PATCH] fix: `bad_keywords.txt` apparently contains some blacklisted websites --- src/domain_stats.ts | 17 ++++++++++++----- src/github.ts | 3 ++- test/index.spec.ts | 17 +++++++++++++++-- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/domain_stats.ts b/src/domain_stats.ts index 739ba66..b7dd7a7 100644 --- a/src/domain_stats.ts +++ b/src/domain_stats.ts @@ -35,25 +35,32 @@ export class Domains { // Thanks tripleee! // https://github.com/Charcoal-SE/halflife/blob/ab0fa5fc2a048b9e17762ceb6e3472e4d9c65317/halflife.py#L77 const [ - watchedCall, blacklistedCall, prsCall, whitelistedCall, redirectorsCall + watchedCall, blacklistedCall, prsCall, whitelistedCall, redirectorsCall, badCall ] = await Promise.all(([ fetch(githubUrls.watched), fetch(githubUrls.blacklisted), fetch(githubUrls.api), fetch(githubUrls.whitelisted), - fetch(githubUrls.redirectors) + fetch(githubUrls.redirectors), + fetch(githubUrls.bad) ])); - const [watched, blacklisted, prs, whitelisted, redirectors] = await Promise.all([ + const [ + watched, blacklisted, prs, whitelisted, redirectors, bad + ] = await Promise.all([ watchedCall.text(), blacklistedCall.text(), prsCall.json() as Promise, whitelistedCall.text(), - redirectorsCall.text() + redirectorsCall.text(), + badCall.text() ]); + const badRegexes = getRegexesFromTxtFile(blacklisted, 0); + const blacklistedRegexes = getRegexesFromTxtFile(bad, 0); + this.watched = getRegexesFromTxtFile(watched, 2); - this.blacklisted = getRegexesFromTxtFile(blacklisted, 0); + this.blacklisted = badRegexes.concat(blacklistedRegexes); this.pullRequests = parseApiResponse(prs); this.whitelisted = whitelisted.split('\n'); diff --git a/src/github.ts b/src/github.ts index fbc3835..0534636 100644 --- a/src/github.ts +++ b/src/github.ts @@ -22,7 +22,8 @@ export const githubUrls = { whitelisted: 'https://raw.githubusercontent.com/userscripters/fire-extra-functionality/master/ini/whitelisted_domains.txt', redirectors: 'https://raw.githubusercontent.com/userscripters/fire-extra-functionality/master/ini/redirectors.txt', watched: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/watched_keywords.txt', - blacklisted: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/blacklisted_websites.txt' + blacklisted: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/blacklisted_websites.txt', + bad: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/bad_keywords.txt' }; function makeRegexESCompatible(keyword: string): RegExp[] { diff --git a/test/index.spec.ts b/test/index.spec.ts index 4087eea..84e0e43 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -76,8 +76,21 @@ describe('index helpers', () => { validWatches.forEach(keyword => expect(isWatched(keyword)).to.be.true); invalidWatches.forEach(keyword => expect(isWatched(keyword)).to.be.false); - const validBlacklists = ['powerigfaustralia', 'ewebtonic.in', 'healthcaresup', 'd680adc632091138ed9fd09659e15dc9']; - const invalidBlacklists = invalidWatches; + const validBlacklists = [ + // blacklisted websites + 'powerigfaustralia', + 'ewebtonic.in', + 'healthcaresup', + 'd680adc632091138ed9fd09659e15dc9', + + // bad keywords + 'orvigomax', + 'opstree.com' + ]; + const invalidBlacklists = [ + ...invalidWatches, + 'blog.opstree.com' // test negative lookbehind + ]; validBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.true); invalidBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.false);