Skip to content

Commit

Permalink
fix: compile regexes with the s flag, surround watched keywords wit…
Browse files Browse the repository at this point in the history
…h `\b`
  • Loading branch information
double-beep authored Jul 12, 2024
1 parent 33f7b4c commit 0e63d24
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 41 deletions.
29 changes: 20 additions & 9 deletions dist/fire_extra.user.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,19 @@
const shortenerPathRegex = /\(\?-i:(\w+)\)\(\?#[a-zA-Z.]+\)/;
const urlPath = keyword.match(shortenerPathRegex)?.[1];
if (!urlPath) return [];
else return [new RegExp(urlPath)];
else return [new RegExp(urlPath, "s")];
}
function getRegexesFromTxtFile(fileContent, position) {
return fileContent.split("\n").flatMap((line) => {
const keyword = line.split(" ")[position];
if (!keyword) return [];
let regexToReturn;
try {
regexToReturn = new RegExp(keyword, "i");
regexToReturn = new RegExp(
// https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands#non--number-blacklists-and-watchlist
position === 2 ? `\\b${keyword}\\b` : keyword,
"is"
);
} catch (error) {
return makeRegexESCompatible(keyword);
}
Expand Down Expand Up @@ -256,12 +260,14 @@
}
function updateKeywordLists(regex, action) {
try {
const newRegex = new RegExp(regex, "i");
const compare = (regex2) => regex2.source !== newRegex.source;
const newRegex = new RegExp(regex, "is");
const compare = (regex2) => regex2.source !== newRegex.source && regex2.source !== `\\b${newRegex.source}\\b`;
switch (action) {
case "watch":
Domains.watched.push(newRegex);
case "watch": {
const modified = new RegExp(`\\b${newRegex.source}\\b`, "si");
Domains.watched.push(modified);
break;
}
case "blacklist":
Domains.watched = Domains.watched.filter(compare);
Domains.blacklisted.push(newRegex);
Expand Down Expand Up @@ -475,7 +481,12 @@
return tpCount >= 5 && fpCount + naaCount === 0 && Number(seHits) < 5;
},
// given a regexes array and a domain, find if the latter is matched by any items in the former
isCaught: (regexes, domain) => regexes.some((regex) => regex.test(domain)),
isCaught: (type, domain) => {
const regexes = Domains[`${type}ed`];
return regexes.some((regex) => regex.test(domain));
},
isWatched: (domain) => helpers.isCaught("watch", domain),
isBlacklisted: (domain) => helpers.isCaught("blacklist", domain),
// get the id the domain li has - dots are replaced with dash
getDomainId: (domainName) => `fire-extra-${domainName.replace(/\./g, "-")}`,
// helper to pluralise strings
Expand Down Expand Up @@ -510,8 +521,8 @@
const domainLi = document.getElementById(domainId);
const domainName = term.includes(".") ? "" : domainLi?.parentElement?.parentElement?.firstChild?.textContent;
if (!seResultCount || !metasmokeStats?.length) return;
const isWatched = helpers.isCaught(Domains.watched, term);
const isBlacklisted = helpers.isCaught(Domains.blacklisted, term);
const isWatched = helpers.isWatched(term);
const isBlacklisted = helpers.isBlacklisted(term);
const qualifiesForWatch = helpers.qualifiesForWatch(metasmokeStats, seResultCount);
const qualifiesForBlacklist = helpers.qualifiesForBlacklist(metasmokeStats, seResultCount);
const watch = {
Expand Down
11 changes: 7 additions & 4 deletions src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,18 @@ function updateKeywordLists(
action: 'watch' | 'unwatch' | 'blacklist' | 'unblacklist'
): void {
try {
const newRegex = new RegExp(regex, 'i');
const newRegex = new RegExp(regex, 'is');

const compare = (regex: RegExp): boolean => regex.source !== newRegex.source;
const compare = (regex: RegExp): boolean =>
regex.source !== newRegex.source && regex.source !== `\\b${newRegex.source}\\b`;

switch (action) {
case 'watch':
Domains.watched.push(newRegex);
case 'watch': {
const modified = new RegExp(`\\b${newRegex.source}\\b`, 'si');
Domains.watched.push(modified);

break;
}
case 'blacklist':
// if it is a blacklist, also remove the item from the watchlist
Domains.watched = Domains.watched.filter(compare);
Expand Down
9 changes: 7 additions & 2 deletions src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ function makeRegexESCompatible(keyword: string): RegExp[] {

const urlPath = keyword.match(shortenerPathRegex)?.[1];
if (!urlPath) return [];
else return [new RegExp(urlPath)];
else return [new RegExp(urlPath, 's')];
}

export function getRegexesFromTxtFile(fileContent: string, position: number): RegExp[] {
Expand All @@ -40,7 +40,12 @@ export function getRegexesFromTxtFile(fileContent: string, position: number): Re

let regexToReturn;
try {
regexToReturn = new RegExp(keyword, 'i');
regexToReturn = new RegExp(
// https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands#non--number-blacklists-and-watchlist

position === 2 ? `\\b${keyword}\\b` : keyword,
'is'
);
} catch (error) {
// regex is incompatible with the ES regex engine
// for (?-i:abcdefg)(?#bit.ly) regexes
Expand Down
14 changes: 11 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ export const helpers = {
},

// given a regexes array and a domain, find if the latter is matched by any items in the former
isCaught: (regexes: RegExp[], domain: string): boolean => regexes.some(regex => regex.test(domain)),
isCaught: (type: 'watch' | 'blacklist', domain: string): boolean => {
const regexes = Domains[`${type}ed`];

return regexes.some(regex => regex.test(domain));
},

isWatched: (domain: string): boolean => helpers.isCaught('watch', domain),

isBlacklisted: (domain: string): boolean => helpers.isCaught('blacklist', domain),

// get the id the domain li has - dots are replaced with dash
getDomainId: (domainName: string): string => `fire-extra-${domainName.replace(/\./g, '-')}`,
Expand Down Expand Up @@ -127,8 +135,8 @@ function updateEmojisInformation(term: string): void {

if (!seResultCount || !metasmokeStats?.length) return;

const isWatched = helpers.isCaught(Domains.watched, term);
const isBlacklisted = helpers.isCaught(Domains.blacklisted, term);
const isWatched = helpers.isWatched(term);
const isBlacklisted = helpers.isBlacklisted(term);

const qualifiesForWatch = helpers.qualifiesForWatch(metasmokeStats, seResultCount);
const qualifiesForBlacklist = helpers.qualifiesForBlacklist(metasmokeStats, seResultCount);
Expand Down
31 changes: 15 additions & 16 deletions test/chat.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,27 @@ describe('chat helpers', function() {
);
});

const { watched, blacklisted } = Domains;
const { isCaught } = helpers;
const { isWatched, isBlacklisted } = helpers;

// random-domain.com was first watched, then unwatched and shouldn't be in the watchlist
expect(isCaught(watched, 'random-domain.com')).to.be.false;
expect(isCaught(blacklisted, 'random-random-domain.com')).to.be.true;
expect(isCaught(blacklisted, 'tenderpublish')).to.be.false; // was unblacklisted
expect(isCaught(watched, 'domain.with.a.few.dots.com')).to.be.true;
expect(isCaught(blacklisted, 'domain.with.many.many.dots.com')).to.be.true;
expect(isWatched('random-domain.com')).to.be.false;
expect(isBlacklisted('random-random-domain.com')).to.be.true;
expect(isBlacklisted('tenderpublish')).to.be.false; // was unblacklisted
expect(isWatched('domain.with.a.few.dots.com')).to.be.true;
expect(isBlacklisted('domain.with.many.many.dots.com')).to.be.true;

// nayvi was blacklisted, therefore it shouldn't be in the watchlist, but in the blacklist
expect(isCaught(watched, 'nayvi')).to.be.false;
expect(isCaught(blacklisted, 'nayvi')).to.be.true;
expect(isCaught(blacklisted, 'naYvi')).to.be.true;
expect(isWatched('nayvi')).to.be.false;
expect(isBlacklisted('nayvi')).to.be.true;
expect(isBlacklisted('naYvi')).to.be.true;

// a user id other than SD's one shouldn't change the watchlist or the blacklist
const random = new JSDOM(getRandomMessage(chatMessage, 'watch', 'example\\.com')).window.document;

newChatEventOccurred({ event_type: 1, user_id: 123456, content: random }); // not Smokey's id
newChatEventOccurred({ event_type: 12, user_id: 120914, content: random }); // not interested in that event type

expect(isCaught(watched, 'example.com')).to.be.false;
expect(isWatched('example.com')).to.be.false;
});

it('should update keyword lists once a pull request is merged', async () => {
Expand All @@ -105,22 +104,22 @@ describe('chat helpers', function() {
}
];

const { isCaught } = helpers;
const { isWatched, isBlacklisted } = helpers;

// Merge pull request #12085
expect(isCaught(Domains.blacklisted, 'spam.com')).to.be.false;
expect(isBlacklisted('spam.com')).to.be.false;
const merge = await getMessage(65938518);
newChatEventOccurred(
{ event_type: 1, user_id: 120914, content: new JSDOM(merge).window.document }
);
expect(isCaught(Domains.blacklisted, 'spam.com')).to.be.true;
expect(isBlacklisted('spam.com')).to.be.true;

// Closed pull request #12080.
const close = await getMessage(65937100);
newChatEventOccurred(
{ event_type: 1, user_id: 120914, content: new JSDOM(close).window.document }
);
expect(isCaught(Domains.watched, 'example.com')).to.be.false;
expect(isCaught(Domains.blacklisted, 'example.com')).to.be.false;
expect(isWatched('example.com')).to.be.false;
expect(isBlacklisted('example.com')).to.be.false;
});
});
16 changes: 13 additions & 3 deletions test/github.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
parseApiResponse
} from '../src/github';
import jsdom from "jsdom";
import { Domains } from '../src/domain_stats';

const { JSDOM } = jsdom;

Expand Down Expand Up @@ -80,11 +81,20 @@ describe('github helpers', () => {
.replace(/\\/mg, '')
.split('\n');

expect(allParsed.every(item => item instanceof RegExp)); // make sure they're all regexes
expect(allParsed.every(item => item instanceof RegExp)).to.be.true; // make sure they're all regexes

const oldWatched = Domains.watched;
const oldBlacklisted = Domains.blacklisted;

Domains.watched = watchedParsed;
Domains.blacklisted = blacklistedParsed;

// the array should contain the right regexes
expect(watches.every(keyword => helpers.isCaught(watchedParsed, keyword))).to.be.true;
expect(blacklists.every(keyword => helpers.isCaught(blacklistedParsed, keyword))).to.be.true;
expect(watches.every(keyword => helpers.isWatched(keyword))).to.be.true;
expect(blacklists.every(keyword => helpers.isBlacklisted(keyword))).to.be.true;

Domains.watched = oldWatched;
Domains.blacklisted = oldBlacklisted;
});

it('should correctly parse a sample GH API response', () => {
Expand Down
15 changes: 11 additions & 4 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,7 @@ describe('index helpers', () => {
});

it('should figure out if a domain is caught or not', () => {
const { watched, blacklisted } = Domains;

const isWatched = (keyword: string): boolean => helpers.isCaught(watched, keyword);
const isBlacklisted = (keyword: string): boolean => helpers.isCaught(blacklisted, keyword);
const { isWatched, isBlacklisted } = helpers;

const validWatches = ['essayssos.com', 'trimfire', 'erozon', 'saleleads.net', 'SaleLeads.net'];
const invalidWatches = ['non-existent-keyword', 'google.com'];
Expand All @@ -61,6 +58,16 @@ describe('index helpers', () => {

validBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.true);
invalidBlacklists.forEach(keyword => expect(isBlacklisted(keyword)).to.be.false);

// https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands#non--number-blacklists-and-watchlist
const partialW = ['randessayssos.com.com', 'atrimfire'];
partialW.forEach(keyword => expect(isWatched(keyword)).to.be.false);

const notPartialW = ['!erozon', '.SaleLeads.net', '.ESSAYssos.com'];
notPartialW.forEach(keyword => expect(isWatched(keyword)).to.be.true);

const partialB = ['testpowerigfaustralia', '!healthcaresup', '@ewebtonic.in'];
partialB.forEach(keyword => expect(isBlacklisted(keyword)).to.be.true);
});

it('should correctly pluralise words', () => {
Expand Down

0 comments on commit 0e63d24

Please sign in to comment.