Skip to content

Commit

Permalink
fix: bad_keywords.txt apparently contains some blacklisted websites
Browse files Browse the repository at this point in the history
  • Loading branch information
double-beep committed Sep 12, 2024
1 parent 608748b commit f028748
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 8 deletions.
17 changes: 12 additions & 5 deletions src/domain_stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,32 @@ export class Domains {
// Thanks tripleee!
// https://github.com/Charcoal-SE/halflife/blob/ab0fa5fc2a048b9e17762ceb6e3472e4d9c65317/halflife.py#L77
const [
watchedCall, blacklistedCall, prsCall, whitelistedCall, redirectorsCall
watchedCall, blacklistedCall, prsCall, whitelistedCall, redirectorsCall, badCall
] = await Promise.all(([
fetch(githubUrls.watched),
fetch(githubUrls.blacklisted),
fetch(githubUrls.api),
fetch(githubUrls.whitelisted),
fetch(githubUrls.redirectors)
fetch(githubUrls.redirectors),
fetch(githubUrls.bad)
]));

const [watched, blacklisted, prs, whitelisted, redirectors] = await Promise.all([
const [
watched, blacklisted, prs, whitelisted, redirectors, bad
] = await Promise.all([
watchedCall.text(),
blacklistedCall.text(),
prsCall.json() as Promise<GithubApiResponse[]>,
whitelistedCall.text(),
redirectorsCall.text()
redirectorsCall.text(),
badCall.text()
]);

const badRegexes = getRegexesFromTxtFile(blacklisted, 0);
const blacklistedRegexes = getRegexesFromTxtFile(bad, 0);

this.watched = getRegexesFromTxtFile(watched, 2);
this.blacklisted = getRegexesFromTxtFile(blacklisted, 0);
this.blacklisted = badRegexes.concat(blacklistedRegexes);
this.pullRequests = parseApiResponse(prs);

this.whitelisted = whitelisted.split('\n');
Expand Down
3 changes: 2 additions & 1 deletion src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ export const githubUrls = {
whitelisted: 'https://raw.githubusercontent.com/userscripters/fire-extra-functionality/master/ini/whitelisted_domains.txt',
redirectors: 'https://raw.githubusercontent.com/userscripters/fire-extra-functionality/master/ini/redirectors.txt',
watched: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/watched_keywords.txt',
blacklisted: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/blacklisted_websites.txt'
blacklisted: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/blacklisted_websites.txt',
bad: 'https://raw.githubusercontent.com/Charcoal-SE/SmokeDetector/master/bad_keywords.txt'
};

function makeRegexESCompatible(keyword: string): RegExp[] {
Expand Down
17 changes: 15 additions & 2 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,21 @@ describe('index helpers', () => {
validWatches.forEach(keyword => expect(isWatched(keyword)).to.be.true);
invalidWatches.forEach(keyword => expect(isWatched(keyword)).to.be.false);

const validBlacklists = ['powerigfaustralia', 'ewebtonic.in', 'healthcaresup', 'd680adc632091138ed9fd09659e15dc9'];
const invalidBlacklists = invalidWatches;
const validBlacklists = [
// blacklisted websites
'powerigfaustralia',
'ewebtonic.in',
'healthcaresup',
'd680adc632091138ed9fd09659e15dc9',

// bad keywords
'orvigomax',
'opstree.com'
];
const invalidBlacklists = [
...invalidWatches,
'blog.opstree.com' // test negative lookbehind
];

validBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.true);
invalidBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.false);
Expand Down

0 comments on commit f028748

Please sign in to comment.