From 53cbe9f303af82f2e198e403c2f8188e045ae6e2 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Thu, 12 Sep 2024 15:21:52 -0700 Subject: [PATCH] support API with remote downloads --- build.js | 7 ++- index.js | 103 ++++++++++++++++++++++++++++++++++++------ package-lock.json | 112 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 3 ++ verify.js | 7 +-- 5 files changed, 214 insertions(+), 18 deletions(-) diff --git a/build.js b/build.js index 964ad66..c687f2c 100644 --- a/build.js +++ b/build.js @@ -1,6 +1,9 @@ import fs from 'fs' import path from 'path' -import { readResources } from './index.js' +import AdblockResources from './index.js' -fs.writeFileSync(path.join(import.meta.dirname, 'dist', 'resources.json'), JSON.stringify(readResources())) +const resourceGetter = new AdblockResources() +const resources = await resourceGetter.resources() + +fs.writeFileSync(path.join(import.meta.dirname, 'dist', 'resources.json'), JSON.stringify(resources)) diff --git a/index.js b/index.js index 37bd2c2..6431a52 100644 --- a/index.js +++ b/index.js @@ -1,17 +1,94 @@ -import fs from 'fs' -import path from 'path' +import fs from 'node:fs/promises' +import path from 'node:path' +import { pipeline, Readable } from 'node:stream' +import { promisify } from 'node:util' +import zlib from 'node:zlib' -import metadata from './metadata.json' with { type: "json" } +import tar from 'tar-stream' -const readResources = (() => { - return metadata.map(item => ({ - name: item.name, - aliases: item.aliases, - kind: item.kind, - content: fs.readFileSync(path.join(import.meta.dirname, 'resources', item.resourcePath)).toString('base64') - })) -}) +const pipe = promisify(pipeline) -import listCatalog from './filter_lists/list_catalog.json' with { type: "json" } +// Allows fetching the adblock list catalog and/or resources library. +// +// These files can be assembled from the files within this NPM package, or from +// an upstream git archive URL, depending on the constructor used. +// +// Because of the remote download capability, this repo doesn't need to be +// consistently kept up-to-date when resources are updated. +// +// However, if the logic or output format has to be changed in this code, +// please be sure to bump the version used by downstream dependencies as well. +export default class AdblockResources { + // `repoRootUrl` can be `undefined` to read from files on this filesystem, + // or use a remote URL like 'https://github.com/brave/adblock-resources/archive/refs/heads/master.tar.gz' + constructor(repoTarGzUrl) { + this.repoTarGzUrl = repoTarGzUrl + } -export { listCatalog, readResources } + async listCatalog() { + if (this.repoTarGzUrl === undefined) { + return (await import('./filter_lists/list_catalog.json', { with: { type: "json" } })).default + } else { + let p + + for await (const entry of entriesFromTarGz(this.repoTarGzUrl)) { + const file = getRepoPath(entry) + if (file === '/filter_lists/list_catalog.json') { + p = await new Response(entry).json() + } else { + entry.resume() + } + } + + return await p + } + } + + async resources() { + let resourceFiles + + if (this.repoTarGzUrl === undefined) { + const resources = await fs.readdir(path.join(import.meta.dirname, 'resources')); + resourceFiles = Promise.all(resources.map(async (file) => { + const resourcePath = path.basename(file) + return [ resourcePath, await fs.readFile(path.join(import.meta.dirname, 'resources', resourcePath)) ] + })); + } else { + const resourceIterator = (async function* (url) { + for await (const entry of entriesFromTarGz(url)) { + const file = getRepoPath(entry) + if (file.startsWith('/resources/') && file !== '/resources/') { + yield [ path.basename(file), Buffer.from(await new Response(entry).arrayBuffer()) ] + } else { + entry.resume() + } + } + })(this.repoTarGzUrl) + resourceFiles = Array.fromAsync(resourceIterator) + } + + return (await resourceFiles).map(([name, buffer]) => { + return { + name, + aliases: [], + kind: { mime: 'application/javascript' }, + content: buffer.toString('base64') + } + }) + } +} + +async function* entriesFromTarGz(url) { + const gunzip = zlib.createGunzip() + const extract = tar.extract() + pipe(Readable.fromWeb((await fetch(url)).body), gunzip, extract) + + for await (const entry of extract) { + yield entry + } +} + +function getRepoPath(entry) { + // strip the top-level dir (something like 'adblock-resources-master') + return entry.header.name.substring(entry.header.name.indexOf('/')) +} diff --git a/package-lock.json b/package-lock.json index c27eda2..058f42f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "adblock-resources", "version": "1.0.0", "license": "MPL-2.0", + "dependencies": { + "tar-stream": "^3.1.7" + }, "devDependencies": { "adblock-rs": "0.8.12" } @@ -22,6 +25,19 @@ "cargo-cp-artifact": "^0.1" } }, + "node_modules/b4a": { + "version": "1.6.6", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==", + "license": "Apache-2.0" + }, + "node_modules/bare-events": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz", + "integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==", + "license": "Apache-2.0", + "optional": true + }, "node_modules/cargo-cp-artifact": { "version": "0.1.9", "resolved": "https://registry.npmjs.org/cargo-cp-artifact/-/cargo-cp-artifact-0.1.9.tgz", @@ -31,6 +47,52 @@ "bin": { "cargo-cp-artifact": "bin/cargo-cp-artifact.js" } + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "license": "MIT" + }, + "node_modules/queue-tick": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", + "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==", + "license": "MIT" + }, + "node_modules/streamx": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.20.1.tgz", + "integrity": "sha512-uTa0mU6WUC65iUvzKH4X9hEdvSW7rbPxPtwfWiLMSj3qTdQbAiUboZTxauKfpFuGIGa1C2BYijZ7wgdUXICJhA==", + "license": "MIT", + "dependencies": { + "fast-fifo": "^1.3.2", + "queue-tick": "^1.0.1", + "text-decoder": "^1.1.0" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" + } + }, + "node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/text-decoder": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.0.tgz", + "integrity": "sha512-n1yg1mOj9DNpk3NeZOx7T6jchTbyJS3i3cucbNN6FcdPriMZx7NsgrGpWWdWZZGxD7ES1XB+3uoqHMgOKaN+fg==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } } }, "dependencies": { @@ -43,11 +105,61 @@ "cargo-cp-artifact": "^0.1" } }, + "b4a": { + "version": "1.6.6", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" + }, + "bare-events": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz", + "integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==", + "optional": true + }, "cargo-cp-artifact": { "version": "0.1.9", "resolved": "https://registry.npmjs.org/cargo-cp-artifact/-/cargo-cp-artifact-0.1.9.tgz", "integrity": "sha512-6F+UYzTaGB+awsTXg0uSJA1/b/B3DDJzpKVRu0UmyI7DmNeaAl2RFHuTGIN6fEgpadRxoXGb7gbC1xo4C3IdyA==", "dev": true + }, + "fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" + }, + "queue-tick": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", + "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==" + }, + "streamx": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.20.1.tgz", + "integrity": "sha512-uTa0mU6WUC65iUvzKH4X9hEdvSW7rbPxPtwfWiLMSj3qTdQbAiUboZTxauKfpFuGIGa1C2BYijZ7wgdUXICJhA==", + "requires": { + "bare-events": "^2.2.0", + "fast-fifo": "^1.3.2", + "queue-tick": "^1.0.1", + "text-decoder": "^1.1.0" + } + }, + "tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "requires": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "text-decoder": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.0.tgz", + "integrity": "sha512-n1yg1mOj9DNpk3NeZOx7T6jchTbyJS3i3cucbNN6FcdPriMZx7NsgrGpWWdWZZGxD7ES1XB+3uoqHMgOKaN+fg==", + "requires": { + "b4a": "^1.6.4" + } } } } diff --git a/package.json b/package.json index ebc4321..49fdf4f 100644 --- a/package.json +++ b/package.json @@ -21,5 +21,8 @@ "homepage": "https://github.com/brave/adblock-resources#readme", "devDependencies": { "adblock-rs": "0.8.12" + }, + "dependencies": { + "tar-stream": "^3.1.7" } } diff --git a/verify.js b/verify.js index 68d75ac..8aaf4c6 100644 --- a/verify.js +++ b/verify.js @@ -1,10 +1,13 @@ -import { readResources, listCatalog } from './index.js' +import AdblockResources from './index.js' import assert from 'node:assert' import crypto from 'crypto' import test from 'node:test' import { Engine, FilterFormat, FilterSet } from 'adblock-rs' +const getter = new AdblockResources() +const [resources, listCatalog] = await Promise.all([getter.resources(), getter.listCatalog()]) + const getIDFromBase64PublicKey = (key) => { const hash = crypto.createHash('sha256') const data = Buffer.from(key, 'base64') @@ -16,8 +19,6 @@ const getIDFromBase64PublicKey = (key) => { } test('resources are parsed OK by adblock-rust', t => { - const resources = readResources() - const filterSet = new FilterSet() const engine = new Engine(filterSet)