Add Incremental weaving

Kseen715 · Aug 7, 2024 · 8b6f966 · 8b6f966
1 parent fe8d9ce
commit 8b6f966
Show file tree

Hide file tree

Showing 19 changed files with 128 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ NONE
 - facebook.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
 - instagram.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
 - meta - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
-- netflix.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
+- netflix - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
 - openai.com
 - x.com - from [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)
 - youtube.com - from [[x]](https://github.com/touhidurrr/iplist-youtube?tab=readme-ov-file) [[x]](https://www.gstatic.com/ipranges/goog.json) [[x]](https://rockblack.su/vpn/dopolnitelno/diapazon-ip-adresov)

diff --git a/hashes/README.md.hash b/hashes/README.md.hash
@@ -0,0 +1 @@
+8z�a���~|��hBY��A�m6q�9�SC{�b
diff --git a/hashes/chatgpt.com.csv.hash b/hashes/chatgpt.com.csv.hash
@@ -0,0 +1 @@
+=�`�Zq&|����%Ss�m��μdBD��h�
diff --git a/hashes/db.csv.hash b/hashes/db.csv.hash
@@ -0,0 +1 @@
+����}�v̵J����R��<kd�]EQc�d"�G
diff --git a/hashes/discord.csv.hash b/hashes/discord.csv.hash
@@ -0,0 +1 @@
+�0p���o��J=��$M�����D�HN��>%�
diff --git a/hashes/facebook.com.csv.hash b/hashes/facebook.com.csv.hash
@@ -0,0 +1 @@
+�qw�?QC��l��Lr���P����F¯_*)�

diff --git a/hashes/instagram.com.csv.hash b/hashes/instagram.com.csv.hash
@@ -0,0 +1 @@
+5셿�>���(�H�`�O�`lǮ�w��S4�
diff --git a/hashes/meta.csv.hash b/hashes/meta.csv.hash
@@ -0,0 +1 @@
+�j"m�$��L��a��=�f\��a"��Bc
diff --git a/hashes/microsoft.csv.hash b/hashes/microsoft.csv.hash
@@ -0,0 +1 @@
+�?h�iIl�%�ѵ��N��ŵ��;��{
diff --git a/hashes/netflix.csv.hash b/hashes/netflix.csv.hash
@@ -0,0 +1 @@
+��6Ij�Q�����B������D� @^

diff --git a/hashes/x.com.csv.hash b/hashes/x.com.csv.hash
@@ -0,0 +1 @@
+��܋�=|��׽���/�M��&T�sa�
diff --git a/hashes/youtube.com.csv.hash b/hashes/youtube.com.csv.hash
@@ -0,0 +1 @@
+F-�%��A�w}b��h��qz��ᇛ���Y|
diff --git a/in/known/netflix.com.csv → in/known/netflix.csv b/in/known/netflix.com.csv → in/known/netflix.csv
diff --git a/src/common.py b/src/common.py
@@ -2,6 +2,7 @@
 import json
 import colorama
 import datetime
+from hashlib import sha256
 
 
 DB_FILE = 'db.csv'
@@ -120,4 +121,66 @@ def log_error(msg):
         msg (str): Error message
     """
     if LOG_LEVEL >= LOG_LEVELS['ERROR']:
-        print(f'{colorama.Fore.RED}{datetime.datetime.now()} [ERROR] {msg}{colorama.Style.RESET_ALL}')
+        print(f'{colorama.Fore.RED}{datetime.datetime.now()} [ERROR] {msg}{colorama.Style.RESET_ALL}')
+
+
+def hash_file(filename):
+    """Hash file
+
+    Args:
+        filename (str): Name of the file to hash
+
+    Returns:
+        bytes: Hash of the file in bytes
+    """
+    with open(filename, 'rb') as f:
+        return sha256(f.read()).digest()
+
+
+def hash_str(string):
+    """_summary_ Hash string
+
+    Args:
+        string (str): String to hash
+
+    Returns:
+        str: Hash of the string
+    """
+    return sha256(string.encode()).digest()
+
+
+def save_hash_binary(new_hash_bytes, new_hash_filename):
+    """_summary_ Save hash of binary data to file
+
+    Args:
+        data (bytes): Binary data to hash
+        filename (str): Name of the file to save hash
+    """
+    with open(new_hash_filename, 'wb') as f:
+        f.write(new_hash_bytes)
+    log_info(f'Saved hash to {new_hash_filename}')
+
+
+def read_file_binary(filename):
+    """_summary_ Read binary data from file
+
+    Args:
+        filename (str): Name of the file to read data
+
+    Returns:
+        bytes: Binary data read from file
+    """
+    with open(filename, 'rb') as f:
+        return f.read()
+
+def check_hash_binary(new_hash_bytes, old_hash_filename):
+    """_summary_ Check hash of binary data
+
+    Args:
+        data (bytes): Binary data to hash
+        filename (str): Name of the file to check hash
+
+    Returns:
+        bool: Whether the hash matches
+    """
+    return new_hash_bytes == read_file_binary(old_hash_filename)
diff --git a/src/make_amnezia.py b/src/make_amnezia.py
@@ -1,8 +1,8 @@
 from common import *
 
-
 def make_amnezia():
     log_info('make_amnezia: Starting')
+
     data = read_csv(DB_FILE)
     # use ipv4 as hostname
     data['hostname'] = data['ipv4']

diff --git a/src/make_route_bat.py b/src/make_route_bat.py
@@ -30,6 +30,7 @@
 
 def make_route_bat():
     log_info('make_route_bat: Starting')
+
     data = read_csv(DB_FILE)
     # if ipv4 contains / then it is a masked ip range
     masked = data[data['ipv4'].str.contains('/')]

diff --git a/src/make_wireguard.py b/src/make_wireguard.py
@@ -12,6 +12,7 @@
 
 def make_wireguard():
     log_info('make_wireguard: Starting')
+
     data = read_csv(DB_FILE)
     file_str = ""
     # add header

diff --git a/src/sort_db.py b/src/sort_db.py
@@ -34,13 +34,24 @@ def drop_duplicates(data):
 
 def sort_db():
     log_info('sort_db: Starting')
+    # save_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE + '.hash')
+    if os.path.exists('./hashes/' + DB_FILE + '.hash'):
+        if not check_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE + '.hash'):
+            log_warning('Database file has been modified')
+        else:
+            log_info('Database file has not been modified')
+            return
+    else:
+        log_warning('No hash file found for database')
     data = read_csv(DB_FILE)
     data = data.sort_values(by=['hostname', 'ipv4', 'comment'])
     data = data.drop_duplicates(subset=['ipv4'])
     data = drop_duplicates(data)
     write_csv(data, DB_FILE)
     log_happy('Database sorted')
+    save_hash_binary(hash_file(DB_FILE), './hashes/' + DB_FILE)
     log_info('sort_db: Finished')
+
 
 
 def drop_duplicates_in_known(data):
@@ -52,24 +63,27 @@ def drop_duplicates_in_known(data):
 
     log_info(f"Initial CIDR data count: {len(cidr_data)}")
     log_info(f"Initial non-CIDR data count: {len(not_cidr_data)}")
+    if len(not_cidr_data) == 0:
+        log_info("No non-CIDR data found")
+    else:
+        cidr_ips = set()
+        for cidr in cidr_data.iloc[:, 0]:
+            try:
+                ip_network = ipaddress.ip_network(cidr, strict=False)
+                cidr_ips.update(str(ip) for ip in ip_network.hosts())
+            except ValueError as e:
+                log_warning(f"Invalid CIDR notation {cidr}: {e}")
+                # remove invalid CIDR
+                cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
+                log_info(f'Dropped {cidr} because it is invalid CIDR notation')
 
-    cidr_ips = set()
-    for cidr in cidr_data.iloc[:, 0]:
-        try:
-            ip_network = ipaddress.ip_network(cidr, strict=False)
-            cidr_ips.update(str(ip) for ip in ip_network.hosts())
-        except ValueError as e:
-            log_warning(f"Invalid CIDR notation {cidr}: {e}")
-            # remove invalid CIDR
-            cidr_data = cidr_data[~(cidr_data.iloc[:, 0].astype(str) == cidr)]
-            log_info(f'Dropped {cidr} because it is invalid CIDR notation')
 
+        not_cidr_data = not_cidr_data[~not_cidr_data.iloc[:, 0].astype(str).isin(cidr_ips)]
 
-    not_cidr_data = not_cidr_data[~not_cidr_data.iloc[:, 0].astype(str).isin(cidr_ips)]
+        dropped_ips = set(original_data.iloc[:, 0].astype(str)) - set(not_cidr_data.iloc[:, 0].astype(str)) - set(cidr_data.iloc[:, 0].astype(str))
+        for ip in dropped_ips:
+            log_info(f'Dropped {ip} because it is included in a CIDR range')
 
-    dropped_ips = set(original_data.iloc[:, 0].astype(str)) - set(not_cidr_data.iloc[:, 0].astype(str)) - set(cidr_data.iloc[:, 0].astype(str))
-    for ip in dropped_ips:
-        log_info(f'Dropped {ip} because it is included in a CIDR range')
 
     data = pd.concat([not_cidr_data, cidr_data], ignore_index=True)
     if len(original_data) != len(data):
@@ -86,9 +100,18 @@ def sort_known():
     fname = 'fake_name'
     for file in onlyfiles:
         log_info(f"Processing file: {file}")
+        if os.path.exists(f'./hashes/{file}.hash'):
+            if not check_hash_binary(hash_file(f'in/known/{file}'), f'./hashes/{file}.hash'):
+                log_warning(f'{file} has been modified')
+            else:
+                log_info(f'{file} has not been modified')
+                continue
+        else:
+            log_warning(f'No hash file found for {file}')
+
         data = read_txt_lbl(f'in/known/{file}')
         # add first line with column name
-        data = [fname] + data
+        # data = [fname] + data
         # print(data)
 
         # exit(0)
@@ -101,6 +124,7 @@ def sort_known():
         # drop all line with fname
         data = data[data[data.columns[0]] != fname]
         write_txt(data.iloc[:, 0].tolist(), f'in/known/{file}')
+        save_hash_binary(hash_file(f'in/known/{file}'), f'./hashes/{file}.hash')
         log_happy(f'{file} sorted')
 
     log_info("sort_known: Finished")

diff --git a/src/sort_readme.py b/src/sort_readme.py
@@ -1,11 +1,20 @@
 from common import *
 import re
+import os
 # catch <div id="auto-sort-start"/> and <div id="auto-sort-end"/> in README.md
 # sort the lines between them alphabetically
 
 
 def sort_readme():
     log_info('sort_readme: Starting')
+    if os.path.exists('./hashes/README.md.hash'):
+        if not check_hash_binary(hash_file('README.md'), './hashes/README.md.hash'):
+            log_warning('README.md has been modified')
+        else:
+            log_info('README.md has not been modified')
+            return
+    else:
+        log_warning('No hash file found for README.md')
 
     with open('README.md', 'r') as f:
         readme = f.readlines()
@@ -32,6 +41,7 @@ def sort_readme():
         f.writelines(sorted_readme)
 
     log_happy('README.md sorted')
+    save_hash_binary(hash_file('README.md'), './hashes/README.md.hash')
     log_info('sort_readme: Finished')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		8z�a��~\|��hBY��A�m6q�9�SC{�b
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		�0p��o��J=��$M��D�HN��>%�
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		�qw�?QC��l��Lr��P��F¯_*)�
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		5셿�>��(�H�`�O�`lǮ�w��S4�
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		�j"m�$��L��a��=�f\��a"��Bc
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		�?h�iIl�%�ѵ��N��ŵ��;��{
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		��܋�=\|��׽��/�M��&T�sa�
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		F-�%��A�w}b��h��qz��ᇛ��Y\|