Skip to content

Commit

Permalink
Add Incremental weaving
Browse files Browse the repository at this point in the history
  • Loading branch information
Kseen715 committed Aug 7, 2024
1 parent fe8d9ce commit 8b6f966
Show file tree
Hide file tree
Showing 19 changed files with 128 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ NONE
- facebook.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- instagram.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- meta - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- netflix.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- netflix - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- openai.com
- x.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
- youtube.com - from [[x]](https://github.com/touhidurrr/iplist-youtube?tab=readme-ov-file) [[x]](https://www.gstatic.com/ipranges/goog.json) [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
Expand Down
1 change: 1 addition & 0 deletions hashes/README.md.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8z�a���~|��hBY��A�m6q�9�SC{�b
1 change: 1 addition & 0 deletions hashes/chatgpt.com.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
=�`�Zq&|����%Ss�m��μdBD��h�
1 change: 1 addition & 0 deletions hashes/db.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
����}�v̵J����R��<kd�]EQc�d"�G
1 change: 1 addition & 0 deletions hashes/discord.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
�0p�� �o��J=��$M�����D�HN��>%�
1 change: 1 addition & 0 deletions hashes/facebook.com.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
�qw�?QC��l��Lr���P����F¯_*)�
Expand Down
1 change: 1 addition & 0 deletions hashes/instagram.com.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5셿�>���(�H�`�O�`lǮ�w��S4�
1 change: 1 addition & 0 deletions hashes/meta.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
�j"m�$��L ��a��=�f\��a"��Bc
1 change: 1 addition & 0 deletions hashes/microsoft.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
�?h�iIl�%�ѵ��N ��ŵ��;��{
1 change: 1 addition & 0 deletions hashes/netflix.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
��6Ij�Q�����B������D� @^
Expand Down
1 change: 1 addition & 0 deletions hashes/x.com.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
��܋�=|��׽���/�M��&T� sa�
1 change: 1 addition & 0 deletions hashes/youtube.com.csv.hash
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
F-�%��A�w}b��h��qz��ᇛ���Y|
File renamed without changes.
65 changes: 64 additions & 1 deletion src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import colorama
import datetime
from hashlib import sha256


DB_FILE = 'db.csv'
Expand Down Expand Up @@ -120,4 +121,66 @@ def log_error(msg):
msg (str): Error message
"""
if LOG_LEVEL >= LOG_LEVELS['ERROR']:
print(f'{colorama.Fore.RED}{datetime.datetime.now()} [ERROR] {msg}{colorama.Style.RESET_ALL}')
print(f'{colorama.Fore.RED}{datetime.datetime.now()} [ERROR] {msg}{colorama.Style.RESET_ALL}')


def hash_file(filename):
"""Hash file
Args:
filename (str): Name of the file to hash
Returns:
bytes: Hash of the file in bytes
"""
with open(filename, 'rb') as f:
return sha256(f.read()).digest()


def hash_str(string):
"""_summary_ Hash string
Args:
string (str): String to hash
Returns:
str: Hash of the string
"""
return sha256(string.encode()).digest()


def save_hash_binary(new_hash_bytes, new_hash_filename):
"""_summary_ Save hash of binary data to file
Args:
data (bytes): Binary data to hash
filename (str): Name of the file to save hash
"""
with open(new_hash_filename, 'wb') as f:
f.write(new_hash_bytes)
log_info(f'Saved hash to {new_hash_filename}')


def read_file_binary(filename):
"""_summary_ Read binary data from file
Args:
filename (str): Name of the file to read data
Returns:
bytes: Binary data read from file
"""
with open(filename, 'rb') as f:
return f.read()

def check_hash_binary(new_hash_bytes, old_hash_filename):
"""_summary_ Check hash of binary data
Args:
data (bytes): Binary data to hash
filename (str): Name of the file to check hash
Returns:
bool: Whether the hash matches
"""
return new_hash_bytes == read_file_binary(old_hash_filename)
2 changes: 1 addition & 1 deletion src/make_amnezia.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from common import *


def make_amnezia():
log_info('make_amnezia: Starting')

data = read_csv(DB_FILE)
# use ipv4 as hostname
data['hostname'] = data['ipv4']
Expand Down
1 change: 1 addition & 0 deletions src/make_route_bat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

def make_route_bat():
log_info('make_route_bat: Starting')

data = read_csv(DB_FILE)
# if ipv4 contains / then it is a masked ip range
masked = data[data['ipv4'].str.contains('/')]
Expand Down
1 change: 1 addition & 0 deletions src/make_wireguard.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

def make_wireguard():
log_info('make_wireguard: Starting')

data = read_csv(DB_FILE)
file_str = ""
# add header
Expand Down
54 changes: 39 additions & 15 deletions src/sort_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,24 @@ def drop_duplicates(data):

def sort_db():
log_info('sort_db: Starting')
# save_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE + '.hash')
if os.path.exists('./hashes/' + DB_FILE + '.hash'):
if not check_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE + '.hash'):
log_warning('Database file has been modified')
else:
log_info('Database file has not been modified')
return
else:
log_warning('No hash file found for database')
data = read_csv(DB_FILE)
data = data.sort_values(by=['hostname', 'ipv4', 'comment'])
data = data.drop_duplicates(subset=['ipv4'])
data = drop_duplicates(data)
write_csv(data, DB_FILE)
log_happy('Database sorted')
save_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE)
log_info('sort_db: Finished')



def drop_duplicates_in_known(data):
Expand All @@ -52,24 +63,27 @@ def drop_duplicates_in_known(data):

log_info(f"Initial CIDR data count: {len(cidr_data)}")
log_info(f"Initial non-CIDR data count: {len(not_cidr_data)}")
if len(not_cidr_data) == 0:
log_info("No non-CIDR data found")
else:
cidr_ips = set()
for cidr in cidr_data.iloc[:, 0]:
try:
ip_network = ipaddress.ip_network(cidr, strict=False)
cidr_ips.update(str(ip) for ip in ip_network.hosts())
except ValueError as e:
log_warning(f"Invalid CIDR notation {cidr}: {e}")
# remove invalid CIDR
cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
log_info(f'Dropped {cidr} because it is invalid CIDR notation')

cidr_ips = set()
for cidr in cidr_data.iloc[:, 0]:
try:
ip_network = ipaddress.ip_network(cidr, strict=False)
cidr_ips.update(str(ip) for ip in ip_network.hosts())
except ValueError as e:
log_warning(f"Invalid CIDR notation {cidr}: {e}")
# remove invalid CIDR
cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
log_info(f'Dropped {cidr} because it is invalid CIDR notation')

not_cidr_data = not_cidr_data[~not_cidr_data.iloc[:, 0].astype(str).isin(cidr_ips)]

not_cidr_data = not_cidr_data[~not_cidr_data.iloc[:, 0].astype(str).isin(cidr_ips)]
dropped_ips = set(original_data.iloc[:, 0].astype(str)) - set(not_cidr_data.iloc[:, 0].astype(str)) - set(cidr_data.iloc[:, 0].astype(str))
for ip in dropped_ips:
log_info(f'Dropped {ip} because it is included in a CIDR range')

dropped_ips = set(original_data.iloc[:, 0].astype(str)) - set(not_cidr_data.iloc[:, 0].astype(str)) - set(cidr_data.iloc[:, 0].astype(str))
for ip in dropped_ips:
log_info(f'Dropped {ip} because it is included in a CIDR range')

data = pd.concat([not_cidr_data, cidr_data], ignore_index=True)
if len(original_data) != len(data):
Expand All @@ -86,9 +100,18 @@ def sort_known():
fname = 'fake_name'
for file in onlyfiles:
log_info(f"Processing file: {file}")
if os.path.exists(f'./hashes/{file}.hash'):
if not check_hash_binary(hash_file(f'in/known/{file}'), f'./hashes/{file}.hash'):
log_warning(f'{file} has been modified')
else:
log_info(f'{file} has not been modified')
continue
else:
log_warning(f'No hash file found for {file}')

data = read_txt_lbl(f'in/known/{file}')
# add first line with column name
data = [fname] + data
# data = [fname] + data
# print(data)

# exit(0)
Expand All @@ -101,6 +124,7 @@ def sort_known():
# drop all line with fname
data = data[data[data.columns[0]] != fname]
write_txt(data.iloc[:, 0].tolist(), f'in/known/{file}')
save_hash_binary(hash_file(f'in/known/{file}'), f'./hashes/{file}.hash')
log_happy(f'{file} sorted')

log_info("sort_known: Finished")
Expand Down
10 changes: 10 additions & 0 deletions src/sort_readme.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
from common import *
import re
import os
# catch <div id="auto-sort-start"/> and <div id="auto-sort-end"/> in README.md
# sort the lines between them alphabetically


def sort_readme():
log_info('sort_readme: Starting')
if os.path.exists('./hashes/README.md.hash'):
if not check_hash_binary(hash_file('README.md'), './hashes/README.md.hash'):
log_warning('README.md has been modified')
else:
log_info('README.md has not been modified')
return
else:
log_warning('No hash file found for README.md')

with open('README.md', 'r') as f:
readme = f.readlines()
Expand All @@ -32,6 +41,7 @@ def sort_readme():
f.writelines(sorted_readme)

log_happy('README.md sorted')
save_hash_binary(hash_file('README.md'), './hashes/README.md.hash')
log_info('sort_readme: Finished')


Expand Down

0 comments on commit 8b6f966

Please sign in to comment.