Skip to content

Commit

Permalink
New CIDR validation system
Browse files Browse the repository at this point in the history
  • Loading branch information
Kseen715 committed Aug 17, 2024
1 parent fbc0c1c commit 11cb16e
Show file tree
Hide file tree
Showing 23 changed files with 88 additions and 30 deletions.
1 change: 0 additions & 1 deletion db.csv
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,6 @@
"108.160.170.0/24","youtube.com","__KNOWN__"
"108.160.172.0/23","youtube.com","__KNOWN__"
"108.170.192.0/18","youtube.com","__KNOWN__"
"108.177.0.0/15","youtube.com","__KNOWN__"
"108.177.103.0/24","youtube.com","__KNOWN__"
"108.177.104.0/24","youtube.com","__KNOWN__"
"108.177.111.0/24","youtube.com","__KNOWN__"
Expand Down
2 changes: 0 additions & 2 deletions hashes/README.md.hash

This file was deleted.

1 change: 0 additions & 1 deletion hashes/chatgpt.com.csv.hash

This file was deleted.

1 change: 0 additions & 1 deletion hashes/db.csv.hash

This file was deleted.

1 change: 0 additions & 1 deletion hashes/discord.csv.hash

This file was deleted.

1 change: 0 additions & 1 deletion hashes/facebook.com.csv.hash

This file was deleted.

Binary file removed hashes/instagram.com.csv.hash
Binary file not shown.
1 change: 0 additions & 1 deletion hashes/meta.csv.hash

This file was deleted.

1 change: 0 additions & 1 deletion hashes/microsoft.csv.hash

This file was deleted.

Binary file removed hashes/netflix.csv.hash
Binary file not shown.
Binary file removed hashes/nhentai.net.csv.hash
Binary file not shown.
Binary file removed hashes/rutracker.org.csv.hash
Binary file not shown.
1 change: 0 additions & 1 deletion hashes/x.com.csv.hash

This file was deleted.

Binary file removed hashes/xhamster.com.csv.hash
Binary file not shown.
Binary file removed hashes/xvideos.com.csv.hash
Binary file not shown.
2 changes: 0 additions & 2 deletions hashes/youtube.com.csv.hash

This file was deleted.

1 change: 0 additions & 1 deletion in/known/youtube.com.csv
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
108.160.170.0/24
108.160.172.0/23
108.170.192.0/18
108.177.0.0/15
108.177.103.0/24
108.177.104.0/24
108.177.111.0/24
Expand Down
3 changes: 0 additions & 3 deletions out/amnezia_vpn.json
Original file line number Diff line number Diff line change
Expand Up @@ -2366,9 +2366,6 @@
{
"hostname": "108.170.192.0/18"
},
{
"hostname": "108.177.0.0/15"
},
{
"hostname": "108.177.103.0/24"
},
Expand Down
1 change: 0 additions & 1 deletion out/route_add.bat
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,6 @@ route ADD 108.160.169.0 MASK 255.255.255.0 0.0.0.255
route ADD 108.160.170.0 MASK 255.255.255.0 0.0.0.255
route ADD 108.160.172.0 MASK 255.255.254.0 0.0.1.255
route ADD 108.170.192.0 MASK 255.255.192.0 0.0.63.255
route ADD 108.176.0.0 MASK 255.254.0.0 0.1.255.255
route ADD 108.177.103.0 MASK 255.255.255.0 0.0.0.255
route ADD 108.177.104.0 MASK 255.255.255.0 0.0.0.255
route ADD 108.177.111.0 MASK 255.255.255.0 0.0.0.255
Expand Down
2 changes: 1 addition & 1 deletion out/wireguard_incomplete.conf

Large diffs are not rendered by default.

69 changes: 65 additions & 4 deletions src/common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pandas as pd
import json
import colorama
import datetime
import datetime, os, json
# from hashlib import sha256

import colorama


DB_FILE = 'db.csv'

Expand Down Expand Up @@ -183,6 +183,9 @@ def save_hash_binary(new_hash_bytes, new_hash_filename):
data (bytes): Binary data to hash
filename (str): Name of the file to save hash
"""
# if folder for hashes does not exist, create it
if not os.path.exists(os.path.dirname(new_hash_filename)):
os.makedirs(os.path.dirname(new_hash_filename))
with open(new_hash_filename, 'wb') as f:
f.write(new_hash_bytes)
log_info(f'Saved hash to {new_hash_filename}')
Expand Down Expand Up @@ -210,4 +213,62 @@ def check_hash_binary(new_hash_bytes, old_hash_filename):
Returns:
bool: Whether the hash matches
"""
return new_hash_bytes == read_file_binary(old_hash_filename)
return new_hash_bytes == read_file_binary(old_hash_filename)


def is_cidr_valid(cidr: str):
"""_summary_ Check if CIDR is valid
Args:
cidr (str): CIDR to check
Returns:
bool: Whether the CIDR is valid
"""
if '.' in cidr:
if '/' not in cidr:
return False
ip, mask = cidr.split('/')
mask = int(mask) # how much bits can be non-zero, from start
ip = [int(x) for x in ip.split('.')]

if len(ip) != 4:
return False
# check if all bits after mask are zero
_str_ip = ''.join(f'{x:08b}' for x in ip)
# drop first mask bits
str_ip = _str_ip[mask:]
# check if there are any non-zero bits
for bit in str_ip:
if bit != '0':
log_info(f'{str_ip}')
log_info(f'{_str_ip}')
log_info(f'{'1' * mask}' + '0' * (32 - mask))
return False
return True
elif ':' in cidr:
if '/' not in cidr:
return False
ip, mask = cidr.split('/')
mask = int(mask)
ip = ip.split(':')
# remove empty strings
ip = [x for x in ip if x]

if len(ip) != 8:
# expand with zeros
ip = [x if x else '0000' for x in ip]
ip = [int(x, 16) for x in ip]
_str_ip = ''.join(f'{x:016b}' for x in ip)
# drop first mask bits
str_ip = _str_ip[mask:]
# check if there are any non-zero bits
for bit in str_ip:
if bit != '0':
log_info(f'{str_ip}')
log_info(f'{_str_ip}')
log_info(f'{'1' * mask}' + '0' * (128 - mask))
return False
return True

return False
2 changes: 1 addition & 1 deletion src/make_wireguard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""

# AllowedIPs = 195.201.201.32/32, 192.173.68.0/24, 54.144.0.0/12, 54.192.0.0/12

# TODO: android client can process ~256 peers, so we need to compact the list via some smart shit
def make_wireguard():
log_info('make_wireguard: Starting')

Expand Down
28 changes: 21 additions & 7 deletions src/sort_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@ def sort_db():
data = read_csv(DB_FILE)
data = data.sort_values(by=['hostname', 'ipv4', 'comment'])
data = data.drop_duplicates(subset=['ipv4'])
# check if str contains /, if it does, it is CIDR, check if it valid cidr
# if not, remove it
for index, row in data.iterrows():
if '/' in row['ipv4']:
try:
if not is_cidr_valid(row['ipv4']):
log_warning(f"Invalid CIDR notation {row['ipv4']}")
data = data[~(data['ipv4'] == row['ipv4'])]
log_info(f'Dropped {row["ipv4"]} because it is invalid CIDR notation')
except ValueError as e:
log_error(f"Invalid CIDR notation {row['ipv4']}: {e}")
data = drop_duplicates(data)
write_csv(data, DB_FILE)
log_happy('Database sorted')
Expand All @@ -66,13 +77,8 @@ def drop_duplicates_in_known(data):
log_info("No non-CIDR data found")
else:
for cidr in cidr_data.iloc[:, 0]:
try:
ip_network = ipaddress.ip_network(cidr, strict=False)
not_cidr_data['ipv4'] = not_cidr_data.iloc[:, 0].apply(lambda x: None if ipaddress.ip_address(x) in ip_network else x)
not_cidr_data = not_cidr_data.dropna(subset=[not_cidr_data.columns[0]])
except ValueError as e:
log_warning(f"Invalid CIDR notation {cidr}: {e}")
# remove invalid CIDR
if not is_cidr_valid(cidr):
log_warning(f"Invalid CIDR notation {cidr}")
cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
log_info(f'Dropped {cidr} because it is invalid CIDR notation')

Expand Down Expand Up @@ -109,6 +115,14 @@ def sort_known():
data = pd.DataFrame(data, columns=[data[0].split(',')[0]])
log_info(f"Read {len(data)} rows from {file}")

for index, row in data.iterrows():
if '/' in row.iloc[0]:
try:
ipaddress.ip_network(row.iloc[0], strict=False)
except ValueError as e:
log_warning(f"Invalid CIDR notation {row[0]}: {e}")
data = data[~(data[data.columns[0]] == row[0])]
log_info(f'Dropped {row[0]} because it is invalid CIDR notation')
data = drop_duplicates_in_known(data)
data = data.sort_values(by=[data.columns[0]])

Expand Down

0 comments on commit 11cb16e

Please sign in to comment.