From a31cf814d65ac3d328ac97723a5ae74059ac1a65 Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sun, 14 May 2023 19:41:32 -0400 Subject: [PATCH] feat(adblock): add brave adblock support --- README.md | 19 ++-- kayle/README.md | 5 +- kayle/lib/auto.ts | 85 ++++++++++++++++ kayle/lib/config.ts | 12 +++ kayle/lib/data/README.md | 5 + kayle/lib/data/list.ts | 2 + kayle/lib/index.ts | 12 +-- kayle/lib/kayle.ts | 176 +++++++++------------------------- kayle/lib/utils/adblock.ts | 76 +++++++++++++++ kayle/lib/utils/go-to-page.ts | 14 ++- kayle/package.json | 4 +- 11 files changed, 251 insertions(+), 159 deletions(-) create mode 100644 kayle/lib/auto.ts create mode 100644 kayle/lib/data/README.md create mode 100644 kayle/lib/data/list.ts create mode 100644 kayle/lib/utils/adblock.ts diff --git a/README.md b/README.md index b113a81c..45d5d146 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,19 @@ -Incredibly fast and precise web accessibility engine. +Incredibly fast and precise web accessibility engine with 0 dependencies. ```sh npm install kayle --save ``` -Playwright 🎭 or Puppeteer 🤖 - ```ts import { kayle } from "kayle"; +// Playwright 🎭 or Puppeteer 🤖 const page = await browser.newPage(); -const results = await kayle({ page, browser, origin: "https://mywebsite.com" }); +const results = await kayle({ page, browser, origin: "https://a11ywatch.com" }); ``` If you need to run a full site-wide crawl import `autoKayle`. @@ -36,9 +35,9 @@ const results = await autoKayle({ runners: ["htmlcs", "axe"], includeWarnings: true, origin: "https://a11ywatch.com", - cb: function callback(result) { - console.log(result) - } + cb: function callback(result) { + console.log(result); + }, // store: `${process.cwd()}/_data/`, // _data folder must exist first }); @@ -159,11 +158,9 @@ the best aspects of testing without worrying about a `name`. 1. zh-CN ("Chinese-Simplified") 1. zh-TW ("Chinese-Traditional") -## Performance - -This project is the fastest web accessibility runner OSS. The `htmlcs` and `axe-core` handling of the runners runs faster due to bug fixes and improved optimizations. This library optimizes the scripts to take advtage of v8 and pre-compiles locales in separate scripts for blazing fast speeds. +## Features -- Playwright runs 100% faster than puppeteer. Most of it is due to more fine grain control of events, ws connections, and timers. +You can enable a high performance adblock detection by brave by installing `npm i adblock-rs` to the project. This module needs to be manually installed and the env variable `KAYLE_ADBLOCK` needs to be set to `true`. ## Testing diff --git a/kayle/README.md b/kayle/README.md index ca30df71..f0122d82 100644 --- a/kayle/README.md +++ b/kayle/README.md @@ -86,14 +86,13 @@ kayle supports multiple test runners which return different results. The built-i - `htmlcs` (default): run tests using [HTML CodeSniffer](./lib/runners/htmlcs.ts) - `custom`: custom runners. -## Benchmarks - -## playwright +## Playwright/Puppeteer `Fast_htmlcs`: expect runs to finish between 10-40ms with static html and around 30-90ms without. `Fast_axecore`: expect runs to finish between 40-350ms with static html and around 30-90ms without. We are working on making fast_axecore fast so it can run relatively near htmlcs. +If you are using puppeteer expect around 2x slower results. ## Utils diff --git a/kayle/lib/auto.ts b/kayle/lib/auto.ts new file mode 100644 index 00000000..f4897681 --- /dev/null +++ b/kayle/lib/auto.ts @@ -0,0 +1,85 @@ +import { _log } from "./config"; +import { Audit, kayle, RunnerConf } from "./kayle"; + +let write; +let extractLinks; + +// on autoKayle link find callback +declare function callback(audit: Audit): Audit; +declare function callback(audit: Audit): Promise; + +/** + * Run accessibility tests for page auto running until all pages complete. + * @param {Object} [config={}] config - Options to change the way tests run. + * @returns {Promise} Returns a promise which resolves with array of results. + */ +export async function autoKayle( + o: RunnerConf & { log?: boolean; store?: string; cb?: typeof callback } = {}, + ignoreSet?: Set, + _results?: Audit[] +): Promise { + if (!write) { + const { writeFile } = await import("fs/promises"); + write = writeFile; + } + // pre init list + if (!_results) { + _results = []; + } + + const result = await kayle(o, true); + + _results.push(result); + + if (o.cb && typeof o.cb === "function") { + await o.cb(result); + } + + // auto run links until finished. + if (!extractLinks) { + extractLinks = (await import("./wasm/extract")).extractLinks; + } + + if (!ignoreSet) { + ignoreSet = new Set(); + } + + const links: string[] = await extractLinks(o); + + // persist html file to disk + if (o.store) { + await write( + `${o.store}/${encodeURIComponent(o.page.url())}`, + await o.page.content() + ); + } + + await o.page.close(); + + await Promise.all( + links.map(async (link) => { + if (ignoreSet.has(link)) { + return await Promise.resolve(); + } + + if (_log.enabled) { + console.log(`Running: ${link}`); + } + + ignoreSet.add(link); + + return await autoKayle( + { + ...o, + page: await o.browser.newPage(), + html: null, + origin: link, + }, + ignoreSet, + _results + ); + }) + ); + + return _results; +} diff --git a/kayle/lib/config.ts b/kayle/lib/config.ts index 34c54ef8..d8cbad23 100644 --- a/kayle/lib/config.ts +++ b/kayle/lib/config.ts @@ -114,3 +114,15 @@ export type RunnerConfig = { // prevent auto intercept request to prevent fetching resources. noIntercept?: boolean; }; + +// log singleton +export const _log = { enabled: false }; + +/** + * Enable or disable logging. + * @param {Object} [enabled] enabled - Enable console logging. + * @returns {void} Returns void. + */ +export function setLogging(enabled?: boolean): void { + _log.enabled = enabled; +} diff --git a/kayle/lib/data/README.md b/kayle/lib/data/README.md new file mode 100644 index 00000000..98e932cd --- /dev/null +++ b/kayle/lib/data/README.md @@ -0,0 +1,5 @@ +# data + +Things that help make kayle more efficient. + +1. [adblock](https://github.com/brave/adblock-rust) - Enabled by using the `process.env.KAYLE_ADBLOCK=true` or setting the env variable `KAYLE_ADBLOCK` to true. In order to use adblock you also need to run `npm i adblock-rs` to install the wasm module locally. diff --git a/kayle/lib/data/list.ts b/kayle/lib/data/list.ts new file mode 100644 index 00000000..4c22a442 --- /dev/null +++ b/kayle/lib/data/list.ts @@ -0,0 +1,2 @@ +// get a list of valid things that help with data +export const adblock = Symbol("adblock engine to ignore resources"); diff --git a/kayle/lib/index.ts b/kayle/lib/index.ts index 94e52d69..d217ac21 100644 --- a/kayle/lib/index.ts +++ b/kayle/lib/index.ts @@ -1,15 +1,9 @@ -export { - kayle, - setLogging, - autoKayle, - Issue, - Audit, - MetaInfo, - Automatable, -} from "./kayle"; +export { kayle, Issue, Audit, MetaInfo, Automatable } from "./kayle"; +export { autoKayle } from "./auto"; export { runnersJavascript } from "./runner-js"; export { goToPage, setNetworkInterception, networkBlock, } from "./utils/go-to-page"; +export { setLogging } from "./config"; diff --git a/kayle/lib/kayle.ts b/kayle/lib/kayle.ts index 34c5c16d..b228517a 100644 --- a/kayle/lib/kayle.ts +++ b/kayle/lib/kayle.ts @@ -1,10 +1,9 @@ import { extractArgs } from "./option"; import { runAction } from "./action"; -import { RunnerConfig } from "./config"; +import { RunnerConfig, _log } from "./config"; import { runnersJavascript, getRunner } from "./runner-js"; import { goToPage, setNetworkInterception } from "./utils/go-to-page"; import { Watcher } from "./watcher"; -import { writeFile } from "fs/promises"; export type MetaInfo = { errorCount: number; @@ -41,137 +40,7 @@ export type Audit = { pageUrl: string; }; -type RunnerConf = Partial; - -let _log = false; - -/** - * Enable or disable logging. - * @param {Object} [enabled] enabled - Enable console logging. - * @returns {void} Returns void. - */ -export function setLogging(enabled?: boolean): void { - _log = enabled; -} - -/** - * Run accessibility tests for page. - * @param {Object} [config={}] config - Options to change the way tests run. - * @param {Boolean} [preventClose=false] preventClose - Prevent page page from closing on finish. - * @returns {Promise} Returns a promise which resolves with results. - */ -export async function kayle( - o: RunnerConf = {}, - preventClose?: boolean -): Promise { - const navigate = - typeof o.page.url === "function" && - o.page.url() === "about:blank" && - (o.origin || o.html); - - // navigate to a clean page - if (navigate) { - await goToPage( - { page: o.page, html: o.html, timeout: o.timeout }, - o.origin - ); - } else if (!o.noIntercept) { - await setNetworkInterception(o.page); - } - - const config = extractArgs(o); - - const watcher = new Watcher(); - - const results = await Promise.race([ - watcher.watch(config.timeout), - auditPage(config), - ]); - - clearTimeout(watcher.timer); - - !preventClose && navigate && (await o.page.close()); - - return results; -} - -let extractLinks; - -// on autoKayle link find callback -declare function callback(audit: Audit): Audit; -declare function callback(audit: Audit): Promise; - -/** - * Run accessibility tests for page auto running until all pages complete. - * @param {Object} [config={}] config - Options to change the way tests run. - * @returns {Promise} Returns a promise which resolves with array of results. - */ -export async function autoKayle( - o: RunnerConf & { log?: boolean; store?: string; cb?: typeof callback } = {}, - ignoreSet?: Set, - _results?: Audit[] -): Promise { - // pre init list - if (!_results) { - _results = []; - } - - const result = await kayle(o, true); - - _results.push(result); - - if (o.cb && typeof o.cb === "function") { - await o.cb(result); - } - - // auto run links until finished. - if (!extractLinks) { - extractLinks = (await import("./wasm/extract")).extractLinks; - } - - if (!ignoreSet) { - ignoreSet = new Set(); - } - - const links: string[] = await extractLinks(o); - - // persist html file to disk - if (o.store) { - await writeFile( - `${o.store}/${encodeURIComponent(o.page.url())}`, - await o.page.content() - ); - } - - await o.page.close(); - - await Promise.all( - links.map(async (link) => { - if (ignoreSet.has(link)) { - return await Promise.resolve(); - } - - if (_log) { - console.log(`Running: ${link}`); - } - - ignoreSet.add(link); - - return await autoKayle( - { - ...o, - page: await o.browser.newPage(), - html: null, - origin: link, - }, - ignoreSet, - _results - ); - }) - ); - - return _results; -} +export type RunnerConf = Partial; // run accessibility audit async function auditPage(config: RunnerConfig) { @@ -222,3 +91,44 @@ async function audit(config: RunnerConfig): Promise { } ); } + +/** + * Run accessibility tests for page. + * @param {Object} [config={}] config - Options to change the way tests run. + * @param {Boolean} [preventClose=false] preventClose - Prevent page page from closing on finish. + * @returns {Promise} Returns a promise which resolves with results. + */ +export async function kayle( + o: RunnerConf = {}, + preventClose?: boolean +): Promise { + const navigate = + typeof o.page.url === "function" && + o.page.url() === "about:blank" && + (o.origin || o.html); + + // navigate to a clean page + if (navigate) { + await goToPage( + { page: o.page, html: o.html, timeout: o.timeout }, + o.origin + ); + } else if (!o.noIntercept) { + await setNetworkInterception(o.page); + } + + const config = extractArgs(o); + + const watcher = new Watcher(); + + const results = await Promise.race([ + watcher.watch(config.timeout), + auditPage(config), + ]); + + clearTimeout(watcher.timer); + + !preventClose && navigate && (await o.page.close()); + + return results; +} diff --git a/kayle/lib/utils/adblock.ts b/kayle/lib/utils/adblock.ts new file mode 100644 index 00000000..41a2d099 --- /dev/null +++ b/kayle/lib/utils/adblock.ts @@ -0,0 +1,76 @@ +type AdCheck = { + check(url: string, domain: string, resource: string, bool: boolean); +}; + +const engine: unknown | AdCheck = + process.env.KAYLE_ADBLOCK === "true" + ? (async () => { + try { + // @ts-ignore module is not installed by default. + const adblockRust = await import("adblock-rs"); + const { promises } = await import("fs"); + const { join } = await import("path"); + + const filterSet = new adblockRust.FilterSet(false); + + // fetch list of resources and store inside data directory + const resourceList = [ + "https://github.com/brave/adblock-rust/blob/master/data/brave/brave-unbreak.txt", + "https://github.com/brave/adblock-rust/blob/master/data/brave/coin-miners.txt", + "https://github.com/brave/adblock-rust/blob/master/data/uBlockOrigin/unbreak.txt", + ]; + + for (const adlist of resourceList) { + const u = new URL(adlist); + const filePath = u.pathname.split("/"); + const fileName = filePath[filePath.length - 1]; + const file = join(__dirname, "../data/", fileName); + + const fileExists = !!(await promises + .stat(file) + .catch((e) => false)); + + if (!fileExists) { + try { + const req = await fetch(adlist); + const adFilter = await req.text(); + + if (adFilter) { + await promises.writeFile(file, adFilter); + filterSet.addFilters(adFilter.split("\n")); + } + } catch (e) { + console.error(e); + } + return; + } + const adFilter = await promises.readFile(file, { + encoding: "utf-8", + }); + + filterSet.addFilters(adFilter.split("\n")); + } + + const engine = new adblockRust.Engine(filterSet, true); + const serializedArrayBuffer = engine.serializeRaw(); + + console.log( + `Adblock Engine size: ${( + serializedArrayBuffer.byteLength / + 1024 / + 1024 + ).toFixed(2)} MB` + ); + + return engine; + } catch (_) { + // error for now without exiting since feature is opt in + console.error( + "Error: adblock-rs installation missing! Run `npm i adblock-rs` or `yarn add adblock-rs` to start the adblock engine." + ); + } + })() + : null; + +// engine to prevent ads and bad stuff +export const adEngine = engine as AdCheck; diff --git a/kayle/lib/utils/go-to-page.ts b/kayle/lib/utils/go-to-page.ts index edd3a2ab..7670ff80 100644 --- a/kayle/lib/utils/go-to-page.ts +++ b/kayle/lib/utils/go-to-page.ts @@ -1,4 +1,5 @@ import { blockedResourceTypes, skippedResources } from "./resource-ignore"; +import { adEngine } from "./adblock"; import type { RunnerConfig } from "../config"; type Request = { @@ -11,6 +12,7 @@ type NetworkResource = { resourceType: string; request: Request; url: string; + domain?: string; }; /** @@ -20,7 +22,7 @@ type NetworkResource = { * @returns {Promise} Returns a promise void. */ const blocknet = async ( - { resourceType, request, url }: NetworkResource, + { resourceType, request, url, domain }: NetworkResource, allowImage?: boolean ) => { // ignore intercepted request @@ -51,6 +53,16 @@ const blocknet = async ( } } + if ( + // if engine is loaded + adEngine && + typeof adEngine.check === "function" && + (url.startsWith("https://") || url.startsWith("http://")) && + adEngine.check(url, new URL(url).origin, resourceType, true) + ) { + return await request.abort(); + } + return await request.continue(); }; diff --git a/kayle/package.json b/kayle/package.json index f209e23b..c83cde91 100644 --- a/kayle/package.json +++ b/kayle/package.json @@ -1,6 +1,6 @@ { "name": "kayle", - "version": "0.4.20", + "version": "0.4.22", "description": "Extremely fast accessibility evaluation for nodejs", "main": "./build/index.js", "keywords": [ @@ -17,7 +17,7 @@ "scripts": { "prepare": "tsc", "build": "tsc", - "compile:test": "tsc && tsc --project tsconfig.test.json", + "compile:test": "yarn build && tsc --project tsconfig.test.json", "lint": "eslint .", "fix": "prettier --write '**/*.{js,jsx,ts,tsx}'", "bench:playwright:htmlcs": "node _tests/bench/fast_htmlcs-playwright.js",