From da58b965f83578b65ba093b0ed3e4f038a136c3c Mon Sep 17 00:00:00 2001 From: xkww3n Date: Mon, 6 Nov 2023 00:23:16 +0800 Subject: [PATCH] Add type-checking for Rule object. --- Utils/geosite.py | 10 ++++---- Utils/rule.py | 59 +++++++++++++++++++++++++++++++----------------- generate.py | 40 ++++++++++++++++++-------------- 3 files changed, 66 insertions(+), 43 deletions(-) diff --git a/Utils/geosite.py b/Utils/geosite.py index 5ca8546..13903a7 100644 --- a/Utils/geosite.py +++ b/Utils/geosite.py @@ -13,17 +13,17 @@ def parse(src: set, excluded_imports=None, excluded_tags=None) -> rule.RuleSet: continue parsed_rule = rule.Rule() if "@" in line: - parsed_rule.Tag = line.split("@")[1] + parsed_rule.set_tag(line.split("@")[1]) if parsed_rule.Tag in excluded_tags: logging.debug(f'Line "{raw_line}" has a excluded tag "{parsed_rule.Tag}", skipped.') continue line = line.split(" @")[0] if ":" not in line: - parsed_rule.Type = "DomainSuffix" - parsed_rule.Payload = line + parsed_rule.set_type("DomainSuffix") + parsed_rule.set_payload(line) elif line.startswith("full:"): - parsed_rule.Type = "DomainFull" - parsed_rule.Payload = line.strip("full:") + parsed_rule.set_type("DomainFull") + parsed_rule.set_payload(line.strip("full:")) elif line.startswith("include:"): name_import = line.split("include:")[1] if name_import not in excluded_imports: diff --git a/Utils/rule.py b/Utils/rule.py index abb92d7..43f0bd0 100644 --- a/Utils/rule.py +++ b/Utils/rule.py @@ -1,21 +1,25 @@ -from copy import copy import logging +from copy import copy +from ipaddress import ip_network from pathlib import Path -from abp.filters.parser import Filter - from . import const class Rule: - Type: str + Type: str # DomainSuffix / DomainFull / IPCIDR / IPCIDR6 / Classic Payload: str Tag: str - def __init__(self, content_type: str = "", payload: str = "", tag: str = ""): - self.Type = content_type # DomainSuffix / DomainFull / IPCIDR / IPCIDR6 / Classic - self.Payload = payload - self.Tag = tag + def __init__(self, rule_type: str = "", payload: str = "", tag: str = ""): + if rule_type or payload: + self.set_type(rule_type) + self.set_payload(payload) + self.set_tag(tag) + else: + self.Type = "" + self.Payload = "" + self.Tag = tag def __str__(self): return f'Type: "{self.Type}", Payload: "{self.Payload}", Tag: {self.Tag if self.Tag else "NONE"}' @@ -26,6 +30,26 @@ def __hash__(self): def __eq__(self, other): return self.Type == other.Type and self.Payload == other.Payload + def set_type(self, rule_type: str): + allowed_type = ("DomainSuffix", "DomainFull", "IPCIDR", "IPCIDR6", "Classic") + if rule_type not in allowed_type: + raise TypeError(f"Unsupported type: {rule_type}") + self.Type = rule_type + + def set_payload(self, payload: str): + if "Domain" in self.Type: + if not is_domain(payload): + raise ValueError(f"Invalid domain: {payload}") + elif "IP" in self.Type: + try: + ip_network(payload) + except ValueError: + raise ValueError(f"Invalid IP address: {payload}") + self.Payload = payload + + def set_tag(self, tag: str = ""): + self.Tag = tag + def includes(self, other): if self.Type == "DomainSuffix": if self.Payload == other.Payload: @@ -112,21 +136,14 @@ def is_ipaddr(addr: str) -> bool: return True -def is_domain(rule: Filter) -> bool: +def is_domain(addr: str) -> bool: blacklist_include = ("/", "*", "=", "~", "?", "#", ",", ":", " ", "(", ")", "[", "]", "_") - if ( - rule.type == "filter" - and rule.selector["type"] == "url-pattern" - and "." in rule.text - and not any([bl_char in rule.text for bl_char in blacklist_include]) - and not rule.text.startswith("-") - and not rule.text.endswith(".") - and not rule.options - and not is_ipaddr(rule.text.strip("||").strip("^")) - ): - return True - else: + if (any([bl_char in addr for bl_char in blacklist_include]) + or addr.startswith("-") + or addr.endswith(".") + or is_ipaddr(addr)): return False + return True def dump(src: RuleSet, target: str, dst: Path, filename: str) -> None: diff --git a/generate.py b/generate.py index ea3191b..741f171 100644 --- a/generate.py +++ b/generate.py @@ -32,25 +32,31 @@ ruleset_exclusions_raw = rule.RuleSet("DOMAIN", []) for line in parse_filterlist(src_rejections): - if rule.is_domain(line) and line.action == "block" and not line.text.endswith("|"): - if line.text.startswith("."): - rule_reject = rule.Rule("DomainSuffix", line.text.strip(".").strip("^")) - ruleset_rejections.add(rule_reject) - logger.debug(f'Line "{line.text}" is added to reject set. {rule_reject}.') - else: - rule_reject = rule.Rule("DomainSuffix", line.text.strip("||").strip("^")) - ruleset_rejections.add(rule_reject) - logger.debug( - f'Line "{line.text}" is added to reject set. "{rule_reject}".' - ) - elif rule.is_domain(line) and line.action == "allow" and not line.text.endswith("|"): - src_exclusions.append(line.text) - logger.debug(f'Line "{line.text}" is added to exclude set.') + if not line.type == "filter": + continue + line_stripped = line.text.strip("|").strip("^").strip("@") + if line.selector["type"] == "url-pattern" and rule.is_domain(line_stripped): + if line.action == "block": + if line.text.startswith("."): + rule_reject = rule.Rule("DomainSuffix", line.text.strip(".").strip("^")) + ruleset_rejections.add(rule_reject) + logger.debug(f'Line "{line.text}" is added to reject set. {rule_reject}.') + else: + rule_reject = rule.Rule("DomainSuffix", line.text.strip("||").strip("^")) + ruleset_rejections.add(rule_reject) + logger.debug( + f'Line "{line.text}" is added to reject set. "{rule_reject}".' + ) + elif line.action == "allow": + src_exclusions.append(line.text) + logger.debug(f'Line "{line.text}" is added to exclude set.') for line in parse_filterlist(src_exclusions): - if rule.is_domain(line): - domain = line.text.strip("@").strip("^").strip("|") - rule_exclude = rule.Rule("DomainFull", domain) + if not line.type == "filter": + continue + line_stripped = line.text.strip("|").strip("^").strip("@") + if rule.is_domain(line_stripped): + rule_exclude = rule.Rule("DomainFull", line_stripped) ruleset_exclusions_raw.add(rule_exclude) logger.debug(f'Line "{line.text}" is added to raw exclude set. "{rule_exclude}".')