Skip to content

Commit

Permalink
feat: findGroup method added
Browse files Browse the repository at this point in the history
  • Loading branch information
muratgozel committed Sep 14, 2024
1 parent ee4275a commit 6bc2541
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 34 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ allBots.disallow('/')

const googleBot = robotstxt.newGroup('googlebot')
googleBot.allow('/abc')
googleBot.disallow('/def')
googleBot.disallow('/def').disallow('/jkl')

// specify multiple bots
const otherBots = robotstxt.newGroup(['abot', 'bbot', 'cbot'])
Expand Down Expand Up @@ -76,6 +76,9 @@ Sitemap: https://yoursite/sitemap.tr.xml
`
const robotstxt = parseRobotsTxt(data)

// update something in some group
robotstxt.findGroup('barbot').allow('/aaa').allow('/bbb')

// store as json or do whatever you want
const json = robotstxt.json()
```
Expand Down
24 changes: 23 additions & 1 deletion src/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { test, expect } from "vitest";
import { parseRobotsTxt } from "./index";
import { parseRobotsTxt, RobotsTxt } from "./index";

test("invalid robots.txt", () => {
const instance = parseRobotsTxt("");
Expand Down Expand Up @@ -147,3 +147,25 @@ test("robots.txt with additional keys", () => {
expect(instance.additions).toContainEqual(["Sitemap", "abc"]);
expect(instance.additions).toContainEqual(["Sitemap", "def"]);
});

test("new group when group already exists", () => {
const robotstxt = new RobotsTxt();
robotstxt.newGroup("abot").allow("/a");
robotstxt.newGroup("bbot").allow("/b");
robotstxt.newGroup("bbot").allow("/c");

expect(robotstxt.groups[1]!.allows).toContainEqual("/b");
expect(robotstxt.groups[1]!.allows).toContainEqual("/c");
});

test("find group", () => {
const robotstxt = new RobotsTxt();
robotstxt.newGroup("abot").allow("/a");
robotstxt.newGroup("bbot").allow("/b");

expect(robotstxt.findGroup("cbot")).toBe(undefined);
expect(robotstxt.findGroup("bbot")).toMatchObject({
ua: ["bbot"],
allows: ["/b"],
});
});
95 changes: 63 additions & 32 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,34 @@ export class RobotsTxt {
groups: RobotsTxtGroup[] = [];
additions: [string, string][] = [];

newGroup(userAgents: string | string[]) {
const self = this;
const group = {
ua: typeof userAgents === "string" ? [userAgents] : userAgents,
allows: [],
disallows: [],
customRules: {},
};
const index = self.groups.push(group) - 1;
_newGroup(userAgents: string | string[]) {
const group = new RobotsTxtGroup(
typeof userAgents === "string" ? [userAgents] : userAgents,
);
this.groups.push(group);
return group;
}

return {
addUserAgent(userAgent: string) {
self.groups[index]!.ua.push(userAgent);
},
allow(path: string) {
self.groups[index]!.allows.push(path);
},
disallow(path: string) {
self.groups[index]!.disallows.push(path);
},
addCustomRule(key: string, value: string) {
self.groups[index]!.customRules[key] = value;
},
};
newGroup(userAgent: string) {
if (this.groups.some(({ ua }) => ua.includes(userAgent))) {
const index = this.groups.findIndex(({ ua }) =>
ua.includes(userAgent),
);
return this.groups[index]!;
} else {
return this._newGroup(userAgent);
}
}

findGroup(userAgent: string) {
if (this.groups.some(({ ua }) => ua.includes(userAgent))) {
const index = this.groups.findIndex(({ ua }) =>
ua.includes(userAgent),
);
return this.groups[index]!;
} else {
return undefined;
}
}

add(key: string, value: string) {
Expand Down Expand Up @@ -63,13 +67,47 @@ export class RobotsTxt {
}
}

export class RobotsTxtGroup {
ua: string[];
allows: string[];
disallows: string[];
customRules: Record<string, string>;

constructor(
ua: string[],
allows: string[] = [],
disallows: string[] = [],
customRules: Record<string, string> = {},
) {
this.ua = ua;
this.allows = allows;
this.disallows = disallows;
this.customRules = customRules;
}

addUserAgent(userAgent: string) {
this.ua.push(userAgent);
}

allow(path: string) {
this.allows.push(path);
}

disallow(path: string) {
this.disallows.push(path);
}

addCustomRule(key: string, value: string) {
this.customRules[key] = value;
}
}

export function parseRobotsTxt(data: string) {
const robotstxt = new RobotsTxt();
const lines = data.split(/\r?\n/).map((line) => trimLine(line));

let didContentStart = false;
let group: ReturnType<InstanceType<typeof RobotsTxt>["newGroup"]> | null =
null;
let group: InstanceType<typeof RobotsTxtGroup> | null = null;
for (const line of lines) {
if (canSkipLine(line)) continue;

Expand Down Expand Up @@ -152,10 +190,3 @@ export function parseRobotsTxt(data: string) {
return line.replace(/^[\t\s]+/, "").replace(/[\t\s]+$/, "");
}
}

export type RobotsTxtGroup = {
ua: string[];
allows: string[];
disallows: string[];
customRules: Record<string, string>;
};

0 comments on commit 6bc2541

Please sign in to comment.