diff --git a/README.md b/README.md index 1cd46a932..020544d48 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,3 @@ ## 免责申明 + 本项目仅用作学习爬虫技术,请勿滥用,不要通过此工具做任何违法乱纪或有损国家利益之事 + 禁止使用该项目进行任何盈利活动,对一切非法使用所产生的后果,本人概不负责 - -## Credits -+ [Subconverter](https://github.com/asdlokj1qpi233/subconverter) -+ [Mihomo](https://github.com/MetaCubeX/mihomo/tree/Meta) diff --git a/clash/clash-darwin-amd b/clash/clash-darwin-amd index fb554740c..bad6828d9 100644 Binary files a/clash/clash-darwin-amd and b/clash/clash-darwin-amd differ diff --git a/clash/clash-darwin-arm b/clash/clash-darwin-arm index 78beffae9..9d8e391d7 100644 Binary files a/clash/clash-darwin-arm and b/clash/clash-darwin-arm differ diff --git a/clash/clash-linux-amd b/clash/clash-linux-amd index cf641f63a..83de649e6 100644 Binary files a/clash/clash-linux-amd and b/clash/clash-linux-amd differ diff --git a/clash/clash-linux-arm b/clash/clash-linux-arm index 2a899bd52..94a604c5f 100644 Binary files a/clash/clash-linux-arm and b/clash/clash-linux-arm differ diff --git a/clash/clash-windows-amd.exe b/clash/clash-windows-amd.exe index 6000cf166..93ec7bb47 100644 Binary files a/clash/clash-windows-amd.exe and b/clash/clash-windows-amd.exe differ diff --git a/requirements.txt b/requirements.txt index 36f7c56c6..2f361145f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ PyYAML tqdm -geoip2 \ No newline at end of file +geoip2 +pycryptodomex +fofa-hack \ No newline at end of file diff --git a/subconverter/pref.toml b/subconverter/pref.toml index 2a982bc50..9a3a828c3 100644 --- a/subconverter/pref.toml +++ b/subconverter/pref.toml @@ -17,7 +17,7 @@ insert_url = [""] prepend_insert_url = true # Exclude nodes which remarks match the following patterns. Supports regular expression. -exclude_remarks = ["(?i)(到期|流量|Expire|Traffic|时间|官网|引导页?|网(\\s+)?址|官址|地址|导航|平台|网站|域名|付费|优惠|折扣|刷新|获取|订阅|群|取消|禁|产品|余额|更新|回国|telegram|t.me|频道|电报|售后|反馈|工单|私聊|维护|升级|邮箱|关闭|耗尽|关机|停机|故障|宕机|调整|修复|解决|重新|拥挤|测试|公测|过年|test|测速|https?://|重置|剩余|特殊|⭕|1️⃣|购买|暂时|临时|下载|调试|检查|干扰|热度|公告|官方|推迟|阻断|采购|好用|福利|精品|商用|Prepaid|疫情|感染|下架|投诉|屏蔽|邀请|欢迎|机场|返利|推广|佣金|广告|破解|不同|店|YYDS|真香|关注|谢谢|大家|永久|浏览器|月付|打开|包月|套餐|以上|以下|通知|注册|活动|转换|保证|每天|分享|倒卖|搬运|苏小柠|王者荣耀|代练|去除|不合适|尽快|绑定|临时域名|禁止|登录|激活|账号|恢复|更换|搜索|失联|发布|失联|发布)"] +exclude_remarks = ["(?i)(到期|流量|Expire|Traffic|时间|官网|引导页?|网(\\s+)?址|官址|地址|导航|平台|网站|域名|付费|优惠|折扣|刷新|获取|订阅|群|取消|禁|产品|余额|更新|回国|telegram|t.me|频道|电报|售后|反馈|工单|私聊|维护|升级|邮箱|关闭|耗尽|关机|停机|故障|宕机|调整|修复|解决|重新|拥挤|测试|公测|过年|test|测速|https?://|重置|剩余|特殊|⭕|1️⃣|购买|暂时|临时|下载|调试|检查|干扰|热度|公告|官方|推迟|阻断|采购|好用|福利|精品|商用|Prepaid|疫情|感染|下架|投诉|屏蔽|邀请|欢迎|机场|返利|推广|佣金|广告|破解|不同|骗|店|YYDS|真香|关注|谢谢|大家|永久|浏览器|月付|打开|包月|套餐|以上|以下|通知|注册|活动|转换|保证|每天|分享|倒卖|搬运|苏小柠|王者荣耀|代练|去除|不合适|尽快|绑定|临时域名|禁止|登录|激活|账号|恢复|更换|搜索|失联|发布|失联|发布)"] # Only include nodes which remarks match the following patterns. Supports regular expression. #include_remarks = ["V3.*港"] diff --git a/subconverter/snippets/emoji.toml b/subconverter/snippets/emoji.toml index da9ed3f83..171cdcea7 100644 --- a/subconverter/snippets/emoji.toml +++ b/subconverter/snippets/emoji.toml @@ -23,7 +23,7 @@ match = "(?i:\\bJP[N]?\\b|Japan|Tokyo|Osaka|Saitama|日本|东京|大阪|埼玉| emoji = "🇯🇵" [[emoji]] -match = "(?i:\\bK[O]?R\\b|Korea|首尔|韩|韓)" +match = "(?i:\\bK[O]?R\\b|Korea|首尔|春川|韩|韓)" emoji = "🇰🇷" [[emoji]] @@ -51,7 +51,7 @@ match = "(Austria|Vienna|奥地利|维也纳)" emoji = "🇦🇹" [[emoji]] -match = "(?i:\\bAU[S]?\\b|Australia|Sydney|澳大利亚|澳洲|悉尼)" +match = "(?i:\\bAU[S]?\\b|Australia|Sydney|澳大利亚|澳洲|悉尼|墨尔本)" emoji = "🇦🇺" [[emoji]] diff --git a/subconverter/snippets/emoji.txt b/subconverter/snippets/emoji.txt index 4c26fb908..22a3c4813 100644 --- a/subconverter/snippets/emoji.txt +++ b/subconverter/snippets/emoji.txt @@ -4,14 +4,14 @@ (?i:\bSG[P]?\b|Singapore|新加坡|狮城|[^-]新),🇸🇬 (尼日利亚|Nigeria),🇳🇬 (?i:\bJP[N]?\b|Japan|Tokyo|Osaka|Saitama|日本|东京|大阪|埼玉|[^-]日),🇯🇵 -(?i:\bK[O]?R\b|Korea|首尔|韩|韓),🇰🇷 +(?i:\bK[O]?R\b|Korea|首尔|春川|韩|韓),🇰🇷 (?i:\bUS[A]?\b|America|United.*?States|美国|[^-]美|美西|美东|波特兰|达拉斯|俄勒冈|凤凰城|费利蒙|硅谷|拉斯维加斯|洛杉矶|圣何塞|圣克拉拉|西雅图|芝加哥|纽约|旧金山),🇺🇸 (Ascension|阿森松),🇦🇨 (?i:\bUAE\b|Dubai|阿联酋|迪拜),🇦🇪 (阿尔巴尼亚|Albania),🇦🇱 (Argentina|阿根廷),🇦🇷 (Austria|Vienna|奥地利|维也纳),🇦🇹 -(?i:\bAU[S]?\b|Australia|Sydney|澳大利亚|澳洲|悉尼),🇦🇺 +(?i:\bAU[S]?\b|Australia|Sydney|澳大利亚|澳洲|悉尼|墨尔本),🇦🇺 (阿塞拜疆|Azerbaijan),🇦🇿 (波黑共和国|波士尼亚与赫塞哥维纳|Bosnia|Herzegovina),🇧🇦 (Belgium|比利时),🇧🇪 diff --git a/subscribe/airport.py b/subscribe/airport.py index 7dfd5fa22..6aa4f9753 100644 --- a/subscribe/airport.py +++ b/subscribe/airport.py @@ -454,7 +454,7 @@ def parse( return [] if self.sub.startswith(utils.FILEPATH_PROTOCAL): - self.sub = self.sub[len(utils.FILEPATH_PROTOCAL) - 1 :] + self.sub = self.sub[len(utils.FILEPATH_PROTOCAL) :] if not os.path.exists(self.sub) or not os.path.isfile(self.sub): logger.error(f"[ParseError] file: {self.sub} not found") return [] @@ -505,6 +505,13 @@ def parse( if utils.isblank(name) or name in unused_nodes: continue + # JustMySocks节点,用主机名代替 IP 地址 + if re.match(r"^JMS-\d+@[a-zA-Z0-9.]+:\d+$", name, flags=re.I): + server = name.split("@", maxsplit=1)[1] + hostname = utils.trim(server.split(":", maxsplit=1)[0]).lower() + if re.match(r"^(\d+\.){3}\d+$", item.get("server", ""), flags=re.I): + item["server"] = hostname + try: if self.include and not re.search(self.include, name, re.I): continue diff --git a/subscribe/crawl.py b/subscribe/crawl.py index 95e892129..49f5f52a8 100644 --- a/subscribe/crawl.py +++ b/subscribe/crawl.py @@ -329,7 +329,7 @@ def batch_crawl(conf: dict, num_threads: int = 50, display: bool = True) -> list if len(unknowns) > 0: unknowns = [utils.mask(url=x) for x in unknowns] - logger.warn( + logger.warning( f"[CrawlWarn] some links were found, but could not be confirmed to work, subscriptions: {unknowns}" ) @@ -1081,7 +1081,7 @@ def extract_subscribes( return {} try: limits, collections, proxies = max(1, limits), {}, [] - sub_regex = r"https?://(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))" + sub_regex = r"https?://(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))|https://jmssub\.net/members/getsub\.php\?service=\d+&id=[a-zA-Z0-9\-]{36}(?:\S+)?" extra_regex = r"https?://(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+/sub\?(?:\S+)?target=\S+" protocal_regex = r"(?:vmess|trojan|ss|ssr|snell|hysteria2|vless|hysteria)://[a-zA-Z0-9:.?+=@%&#_\-/]{10,}" @@ -1212,7 +1212,8 @@ def validate( if not params.pop("saved", False): if reachable or (discovered and defeat <= threshold and not expired): # don't storage temporary link shared by someone - if not workflow.standard_sub(url=url) and mode != 1: + pardon = params.pop("pardon", False) + if not pardon and not workflow.standard_sub(url=url) and mode != 1: return result remark(source=params, defeat=defeat, discovered=True) diff --git a/subscribe/origin.py b/subscribe/origin.py index 69c1faceb..795464933 100644 --- a/subscribe/origin.py +++ b/subscribe/origin.py @@ -23,6 +23,7 @@ class Origin(enum.Enum): GOOGLE = ExpireInfo(name="GOOGLE", expire=10) YANDEX = ExpireInfo(name="YANDEX", expire=10) GITHUB = ExpireInfo(name="GITHUB", expire=20) + FOFA = ExpireInfo(name="FOFA", expire=20) V2RAYSE = ExpireInfo(name="V2RAYSE", expire=45) REPO = ExpireInfo(name="REPO", expire=60) REMAIND = ExpireInfo(name="REMAIND", expire=maxsize) diff --git a/subscribe/scripts/fofa.py b/subscribe/scripts/fofa.py new file mode 100644 index 000000000..009bb91c2 --- /dev/null +++ b/subscribe/scripts/fofa.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- + +# @Author : wzdnzd +# @Time : 2024-09-08 + +import gzip +import itertools +import re +import socket +import ssl +import sys +import time +import urllib +import urllib.error +import urllib.request +from copy import deepcopy + +import utils +import yaml +from crawl import naming_task +from logger import logger +from origin import Origin +from urlvalidator import isurl + + +def search(exclude: str = "", maxsize: int = sys.maxsize, timesleep: float = 3, timeout: float = 180) -> list[str]: + try: + from fofa_hack import fofa as client + except ImportError: + logger.error( + "[FOFA] please make sure that the dependencies pycryptodomex and fofa-hack are installed correctly" + ) + return [] + + exclude = utils.trim(exclude) + maxsize = max(maxsize, 10) + timesleep = max(timesleep, 0) + timeout = max(timeout, 0) + items = set() + + generator = client.api( + search_key='body="port: 7890" && body="socks-port: 7891" && body="allow-lan: true"', + endcount=maxsize, + timesleep=timesleep, + timeout=timeout, + ) + + for data in generator: + if not data: + break + + for site in data: + url = utils.trim(site) + try: + if url and (not exclude or not re.search(exclude, url, flags=re.I)): + items.add(url) + except: + logger.error(f"[FOFA] invalid pattern: {exclude}") + + return list(items) + + +def extract_one(url: str) -> list[str]: + url = utils.trim(url) + if not isurl(url=url): + return [] + + regex = r"(?:https?://)?(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))" + + headers = {"User-Agent": "Clash.Meta; Mihomo"} + subscriptions, content = [], "" + count, retry = 0, 2 + + while not content and count < retry: + count += 1 + + try: + request = urllib.request.Request(url=url, headers=headers, method="GET") + response = urllib.request.urlopen(request, timeout=10, context=utils.CTX) + + if re.search(regex, response.geturl(), flags=re.I): + subscriptions.append(response.geturl()) + + content = response.read() + try: + content = str(content, encoding="utf8") + except: + content = gzip.decompress(content).decode("utf8") + except urllib.error.URLError as e: + if not isinstance(e.reason, (socket.gaierror, ssl.SSLError, socket.timeout)): + break + except Exception as e: + pass + + if content: + groups = re.findall(regex, content, flags=re.I) + if groups: + subscriptions.extend(list(set([utils.url_complete(x) for x in groups if x]))) + + # extract from proxy-providers + providers, key = None, "proxy-providers" + try: + providers = yaml.load(content, Loader=yaml.SafeLoader).get(key, []) + except yaml.constructor.ConstructorError: + yaml.add_multi_constructor("str", lambda loader, suffix, node: str(node.value), Loader=yaml.SafeLoader) + providers = yaml.load(content, Loader=yaml.FullLoader).get(key, []) + except Exception as e: + pass + + if providers and isinstance(providers, dict): + for _, v in providers.items(): + if not v or not isinstance(v, dict) or v.get("type", "") != "http": + continue + + link = utils.trim(v.get("url", "")) + if link and (link.startswith("https://") or link.startswith("http://")): + subscriptions.append(link) + + return subscriptions + + +def recall(params: dict) -> list: + def inwrap(sub: str, nocache: bool = True, pardon: bool = False) -> dict: + config = deepcopy(params.get("config", {})) + config["sub"] = sub + config["saved"] = False + config["checked"] = False + config["nocache"] = nocache + config["pardon"] = pardon + config["name"] = naming_task(link) + config["origin"] = Origin.FOFA.name + config["push_to"] = list(set(config.get("push_to", []))) + + return config + + if not params or type(params) != dict: + return [] + + exclude = params.get("exclude", "") + check = params.get("check", True) + maxsize = int(params.get("maxsize", sys.maxsize)) + timesleep = float(params.get("timesleep", 3)) + timeout = float(params.get("timeout", 180)) + + starttime = time.time() + links = search(exclude=exclude, maxsize=maxsize, timesleep=timesleep, timeout=timeout) + if not links: + logger.error(f"[FOFA] cannot found any valid public subscription, cost: {time.time()-starttime:.2f}s") + return [] + + tasks = list() + for link in links: + tasks.append(inwrap(sub=link, nocache=True, pardon=False)) + + if check: + logger.info(f"[FOFA] start to extract subscription from links, count: {len(links)}") + + results = utils.multi_thread_run(func=extract_one, tasks=links) + subscriptions = [x for x in set(itertools.chain.from_iterable(results)) if x] + + for link in subscriptions: + tasks.append(inwrap(sub=link, nocache=False, pardon=True)) + + logger.info(f"[FoFA] found {len(subscriptions)} subscriptions: {subscriptions}") + + cost = "{:.2f}s".format(time.time() - starttime) + logger.info(f"[FOFA] search finished, found {len(tasks)} candidates to be check, cost: {cost}") + + return tasks diff --git a/subscribe/scripts/tempairport.py b/subscribe/scripts/tempairport.py index 9cd04c9d0..a16baf3fc 100644 --- a/subscribe/scripts/tempairport.py +++ b/subscribe/scripts/tempairport.py @@ -57,7 +57,7 @@ def fetchsub(params: dict) -> list: exists, unregisters, unknowns, data = load(engine=engine, persist=persist, retry=params.get("retry", True)) if not exists and not unregisters and unknowns: - logger.warn(f"[TempSubError] skip fetchsub because cannot get any valid config") + logger.warning(f"[TempSubError] skip fetchsub because cannot get any valid config") return [] if unregisters: @@ -75,7 +75,7 @@ def fetchsub(params: dict) -> list: f"[TempSubInfo] cannot get subscribe because domain=[{airport.ref}] forced validation or need pay" ) if not utils.isblank(airport.sub): - logger.warn( + logger.warning( f"[TempSubInfo] renew error, domain: {airport.ref} username: {airport.username} password: {airport.password} sub: {airport.sub}" ) diff --git a/subscribe/scripts/v2rayfree.py b/subscribe/scripts/v2rayfree.py index 8ce75aaf0..fa28ac605 100644 --- a/subscribe/scripts/v2rayfree.py +++ b/subscribe/scripts/v2rayfree.py @@ -51,7 +51,7 @@ def fetch(email: str, retry: int = 2) -> str: fake_email = email[: index // 2] + "***" + email[index:] if "已封禁" in content: - logger.warn(f"[GetRSSError] {content}, email=[{fake_email}]") + logger.warning(f"[GetRSSError] {content}, email=[{fake_email}]") return "" regex = "https://f\.kxyz\.eu\.org/f\.php\?r=([A-Za-z0-9/=]+)" diff --git a/subscribe/scripts/v2rayse.py b/subscribe/scripts/v2rayse.py index 37e2e21fe..b49616580 100644 --- a/subscribe/scripts/v2rayse.py +++ b/subscribe/scripts/v2rayse.py @@ -115,6 +115,7 @@ def list_files(base: str, date: str, maxsize: int, last: datetime) -> list[str]: prefix, files = f"{base}?prefix={date}/", [] while truncated and count < 3: + count += 1 url = prefix if not marker else f"{prefix}&marker={marker}" try: content = utils.http_get(url=url) @@ -154,7 +155,6 @@ def list_files(base: str, date: str, maxsize: int, last: datetime) -> list[str]: files.append(f"{base}/{name}") except: - count += 1 logger.error(f"[V2RaySE] list files error, date: {date}, marker: {marker}") return files @@ -176,14 +176,26 @@ def fetchone( proxies, subscriptions = [], [] if not utils.isb64encode(content=content): - regex = r"(?:https?://)?(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))" + regex = r"(?:https?://)?(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))|https://jmssub\.net/members/getsub\.php\?service=\d+&id=[a-zA-Z0-9\-]{36}(?:\S+)?" groups = re.findall(regex, content, flags=re.I) if groups: subscriptions = list(set([utils.url_complete(x) for x in groups if x])) if not noproxies: try: - proxies = AirPort.decode(text=content, program=subconverter, special=SPECIAL_PROTOCOLS, throw=True) + index = url.rfind("/") + if index != -1: + name = url[index + 1 :] + else: + name = utils.random_chars(length=6, punctuation=False) + + proxies = AirPort.decode( + text=content, + program=subconverter, + artifact=name, + special=SPECIAL_PROTOCOLS, + throw=True, + ) # detect if it contains shared proxy nodes if detect( @@ -325,7 +337,7 @@ def fetch(params: dict) -> list: # clean workspace workflow.cleanup(datapath, filenames=[source, dest, "generate.ini"]) - success = pushtool.push_to(content=content, push_conf=proxies_store, group="v2rayse") + success = pushtool.push_to(content=content or " ", push_conf=proxies_store, group="v2rayse") if not success: filename = os.path.join(os.path.dirname(datapath), "data", "v2rayse.txt") logger.error(f"[V2RaySE] failed to storage {len(proxies)} proxies, will save it to local file {filename}")