New CIDR validation system

Kseen715 · Aug 17, 2024 · 11cb16e · 11cb16e
1 parent fbc0c1c
commit 11cb16e
Show file tree

Hide file tree

Showing 23 changed files with 88 additions and 30 deletions.
diff --git a/db.csv b/db.csv
@@ -788,7 +788,6 @@
 "108.160.170.0/24","youtube.com","__KNOWN__"
 "108.160.172.0/23","youtube.com","__KNOWN__"
 "108.170.192.0/18","youtube.com","__KNOWN__"
-"108.177.0.0/15","youtube.com","__KNOWN__"
 "108.177.103.0/24","youtube.com","__KNOWN__"
 "108.177.104.0/24","youtube.com","__KNOWN__"
 "108.177.111.0/24","youtube.com","__KNOWN__"

diff --git a/hashes/README.md.hash b/hashes/README.md.hash
diff --git a/hashes/chatgpt.com.csv.hash b/hashes/chatgpt.com.csv.hash
diff --git a/hashes/db.csv.hash b/hashes/db.csv.hash
diff --git a/hashes/discord.csv.hash b/hashes/discord.csv.hash
diff --git a/hashes/facebook.com.csv.hash b/hashes/facebook.com.csv.hash
diff --git a/hashes/instagram.com.csv.hash b/hashes/instagram.com.csv.hash
diff --git a/hashes/meta.csv.hash b/hashes/meta.csv.hash
diff --git a/hashes/microsoft.csv.hash b/hashes/microsoft.csv.hash
diff --git a/hashes/netflix.csv.hash b/hashes/netflix.csv.hash
diff --git a/hashes/nhentai.net.csv.hash b/hashes/nhentai.net.csv.hash
diff --git a/hashes/rutracker.org.csv.hash b/hashes/rutracker.org.csv.hash
diff --git a/hashes/x.com.csv.hash b/hashes/x.com.csv.hash
diff --git a/hashes/xhamster.com.csv.hash b/hashes/xhamster.com.csv.hash
diff --git a/hashes/xvideos.com.csv.hash b/hashes/xvideos.com.csv.hash
diff --git a/hashes/youtube.com.csv.hash b/hashes/youtube.com.csv.hash
diff --git a/in/known/youtube.com.csv b/in/known/youtube.com.csv
@@ -34,7 +34,6 @@
 108.160.170.0/24
 108.160.172.0/23
 108.170.192.0/18
-108.177.0.0/15
 108.177.103.0/24
 108.177.104.0/24
 108.177.111.0/24

diff --git a/out/amnezia_vpn.json b/out/amnezia_vpn.json
@@ -2366,9 +2366,6 @@
     {
         "hostname": "108.170.192.0/18"
     },
-    {
-        "hostname": "108.177.0.0/15"
-    },
     {
         "hostname": "108.177.103.0/24"
     },

diff --git a/out/route_add.bat b/out/route_add.bat
@@ -787,7 +787,6 @@ route ADD 108.160.169.0 MASK 255.255.255.0 0.0.0.255
 route ADD 108.160.170.0 MASK 255.255.255.0 0.0.0.255
 route ADD 108.160.172.0 MASK 255.255.254.0 0.0.1.255
 route ADD 108.170.192.0 MASK 255.255.192.0 0.0.63.255
-route ADD 108.176.0.0 MASK 255.254.0.0 0.1.255.255
 route ADD 108.177.103.0 MASK 255.255.255.0 0.0.0.255
 route ADD 108.177.104.0 MASK 255.255.255.0 0.0.0.255
 route ADD 108.177.111.0 MASK 255.255.255.0 0.0.0.255

diff --git a/out/wireguard_incomplete.conf b/out/wireguard_incomplete.conf
diff --git a/src/common.py b/src/common.py
@@ -1,9 +1,9 @@
 import pandas as pd
-import json
-import colorama
-import datetime
+import datetime, os, json
 # from hashlib import sha256
 
+import colorama
+
 
 DB_FILE = 'db.csv'
 
@@ -183,6 +183,9 @@ def save_hash_binary(new_hash_bytes, new_hash_filename):
         data (bytes): Binary data to hash
         filename (str): Name of the file to save hash
     """
+    # if folder for hashes does not exist, create it
+    if not os.path.exists(os.path.dirname(new_hash_filename)):
+        os.makedirs(os.path.dirname(new_hash_filename))
     with open(new_hash_filename, 'wb') as f:
         f.write(new_hash_bytes)
     log_info(f'Saved hash to {new_hash_filename}')
@@ -210,4 +213,62 @@ def check_hash_binary(new_hash_bytes, old_hash_filename):
     Returns:
         bool: Whether the hash matches
     """
-    return new_hash_bytes == read_file_binary(old_hash_filename)
+    return new_hash_bytes == read_file_binary(old_hash_filename)
+
+
+def is_cidr_valid(cidr: str):
+    """_summary_ Check if CIDR is valid
+
+    Args:
+        cidr (str): CIDR to check
+
+    Returns:
+        bool: Whether the CIDR is valid
+    """
+    if '.' in cidr:
+        if '/' not in cidr:
+            return False
+        ip, mask = cidr.split('/')
+        mask = int(mask) # how much bits can be non-zero, from start
+        ip = [int(x) for x in ip.split('.')]
+
+        if len(ip) != 4:
+            return False
+        # check if all bits after mask are zero
+        _str_ip = ''.join(f'{x:08b}' for x in ip)
+        # drop first mask bits
+        str_ip = _str_ip[mask:]
+        # check if there are any non-zero bits
+        for bit in str_ip:
+            if bit != '0':
+                log_info(f'{str_ip}')
+                log_info(f'{_str_ip}')
+                log_info(f'{'1' * mask}' + '0' * (32 - mask))
+                return False
+        return True
+    elif ':' in cidr:
+        if '/' not in cidr:
+            return False
+        ip, mask = cidr.split('/')
+        mask = int(mask)
+        ip = ip.split(':')
+        # remove empty strings
+        ip = [x for x in ip if x]
+
+        if len(ip) != 8:
+            # expand with zeros
+            ip = [x if x else '0000' for x in ip]
+        ip = [int(x, 16) for x in ip]
+        _str_ip = ''.join(f'{x:016b}' for x in ip)
+        # drop first mask bits
+        str_ip = _str_ip[mask:]
+        # check if there are any non-zero bits
+        for bit in str_ip:
+            if bit != '0':
+                log_info(f'{str_ip}')
+                log_info(f'{_str_ip}')
+                log_info(f'{'1' * mask}' + '0' * (128 - mask))
+                return False
+        return True
+
+    return False
diff --git a/src/make_wireguard.py b/src/make_wireguard.py
@@ -9,7 +9,7 @@
 """
 
 # AllowedIPs = 195.201.201.32/32, 192.173.68.0/24, 54.144.0.0/12, 54.192.0.0/12
-
+# TODO: android client can process ~256 peers, so we need to compact the list via some smart shit
 def make_wireguard():
     log_info('make_wireguard: Starting')
 

diff --git a/src/sort_db.py b/src/sort_db.py
@@ -45,6 +45,17 @@ def sort_db():
     data = read_csv(DB_FILE)
     data = data.sort_values(by=['hostname', 'ipv4', 'comment'])
     data = data.drop_duplicates(subset=['ipv4'])
+    # check if str contains /, if it does, it is CIDR, check if it valid cidr
+# if not, remove it
+    for index, row in data.iterrows():
+        if '/' in row['ipv4']:
+            try:
+                if not is_cidr_valid(row['ipv4']):
+                    log_warning(f"Invalid CIDR notation {row['ipv4']}")
+                    data = data[~(data['ipv4'] == row['ipv4'])]
+                    log_info(f'Dropped {row["ipv4"]} because it is invalid CIDR notation')
+            except ValueError as e:
+                log_error(f"Invalid CIDR notation {row['ipv4']}: {e}")
     data = drop_duplicates(data)
     write_csv(data, DB_FILE)
     log_happy('Database sorted')
@@ -66,13 +77,8 @@ def drop_duplicates_in_known(data):
         log_info("No non-CIDR data found")
     else:
         for cidr in cidr_data.iloc[:, 0]:
-            try:
-                ip_network = ipaddress.ip_network(cidr, strict=False)
-                not_cidr_data['ipv4'] = not_cidr_data.iloc[:, 0].apply(lambda x: None if ipaddress.ip_address(x) in ip_network else x)
-                not_cidr_data = not_cidr_data.dropna(subset=[not_cidr_data.columns[0]])
-            except ValueError as e:
-                log_warning(f"Invalid CIDR notation {cidr}: {e}")
-                # remove invalid CIDR
+            if not is_cidr_valid(cidr):
+                log_warning(f"Invalid CIDR notation {cidr}")
                 cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
                 log_info(f'Dropped {cidr} because it is invalid CIDR notation')
 
@@ -109,6 +115,14 @@ def sort_known():
         data = pd.DataFrame(data, columns=[data[0].split(',')[0]])
         log_info(f"Read {len(data)} rows from {file}")
 
+        for index, row in data.iterrows():
+            if '/' in row.iloc[0]:
+                try:
+                    ipaddress.ip_network(row.iloc[0], strict=False)
+                except ValueError as e:
+                    log_warning(f"Invalid CIDR notation {row[0]}: {e}")
+                    data = data[~(data[data.columns[0]] == row[0])]
+                    log_info(f'Dropped {row[0]} because it is invalid CIDR notation')
         data = drop_duplicates_in_known(data)
         data = data.sort_values(by=[data.columns[0]])
-Original file line number
+Diff line change
@@ Expand Up / @@ -2366,9 +2366,6 @@ @@
         {
             "hostname": "108.170.192.0/18"
         },
-        {
-            "hostname": "108.177.0.0/15"
-        },
         {
             "hostname": "108.177.103.0/24"
         },
@@ Expand Down @@