mkrd · mkrd · Nov 21, 2022 · Nov 21, 2022 · Nov 22, 2022 · Nov 22, 2022
diff --git a/dictdatabase/dataclasses.py b/dictdatabase/dataclasses.py
@@ -0,0 +1,19 @@
+import dataclasses
+
+
+@dataclasses.dataclass(frozen=True)
+class SearchResult:
+    start_byte: int
+    end_byte: int
+    found: bool
+
+
+@dataclasses.dataclass(frozen=True)
+class Index:
+    key: str
+    key_start: int
+    key_end: int
+    indent_level: int
+    indent_with: str
+    value_hash: str
+    old_value_end: int
diff --git a/dictdatabase/index_manager.py b/dictdatabase/index_manager.py
@@ -0,0 +1,27 @@
+import hashlib
+
+from dictdatabase import utils
+from dictdatabase.dataclasses import Index
+
+
+def create_index(all_file_bytes: bytes, key: str, start, end) -> Index:
+	"""
+	It takes a JSON file, a key, and a start and end position, and returns a tuple of information about the key and its
+	value
+
+	Args:
+		all_file_bytes (bytes): The entire file as a byte string.
+		key (str): The key of the value we're indexing.
+		start: the start of the value in the file
+		end: the end of the value in the file
+
+	Returns:
+		The key, start, end, indent_level, indent_with, value_hash, end
+	"""
+	key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
+	indent_level, indent_with = utils.detect_indentation_in_json_bytes(
+		all_file_bytes, key_start
+	)
+	value_bytes = all_file_bytes[start:end]
+	value_hash = hashlib.sha256(value_bytes).hexdigest()
+	return Index(key, start, end, indent_level, indent_with, value_hash, end)
diff --git a/dictdatabase/indexing.py b/dictdatabase/indexing.py
@@ -1,6 +1,10 @@
-import orjson
 import os
+
+import orjson
+
 from . import config
+from .dataclasses import Index
+
 
 # Problem: Multiple read processes will concurrently read and write the same file
 # In some cases this will result in a empty read error, thats why the try-except exists
@@ -22,61 +26,65 @@
 
 
 class Indexer:
-	"""
-		The Indexer takes the name of a database file, and tries to load the .index file
-		of the corresponding database file.
-
-		The name of the index file is the name of the database file, with the extension
-		.index and all "/" replaced with "___"
-
-		The content of the index file is a json object, where the keys are keys inside
-		the database json file, and the values are lists of 5 elements:
-		- start_index: The index of the first byte of the value of the key in the database file
-		- end_index: The index of the last byte of the value of the key in the database file
-		- indent_level: The indent level of the key in the database file
-		- indent_with: The indent string used.
-		- value_hash: The hash of the value bytes
-	"""
-
-	__slots__ = ("data", "path")
-
-	def __init__(self, db_name: str):
-		# Make path of index file
-		db_name = db_name.replace("/", "___")
-		self.path = os.path.join(config.storage_directory, ".ddb", f"{db_name}.index")
-
-		os.makedirs(os.path.dirname(self.path), exist_ok=True)
-		if not os.path.exists(self.path):
-			self.data = {}
-			return
-
-		try:
-			with open(self.path, "rb") as f:
-				self.data = orjson.loads(f.read())
-		except orjson.JSONDecodeError:
-			self.data = {}
-
-
-	def get(self, key):
-		"""
-			Returns a list of 5 elements for a key if it exists, otherwise None
-			Elements:[start_index, end_index, indent_level, indent_with, value_hash]
-		"""
-		return self.data.get(key, None)
-
-
-	def write(self, key, start_index, end_index, indent_level, indent_with, value_hash, old_value_end):
-		"""
-			Write index information for a key to the index file
-		"""
-
-		if self.data.get(key, None) is not None:
-			delta = end_index - old_value_end
-			for entry in self.data.values():
-				if entry[0] > old_value_end:
-					entry[0] += delta
-					entry[1] += delta
-
-		self.data[key] = [start_index, end_index, indent_level, indent_with, value_hash]
-		with open(self.path, "wb") as f:
-			f.write(orjson.dumps(self.data))
+    """
+    The Indexer takes the name of a database file, and tries to load the .index file
+    of the corresponding database file.
+
+    The name of the index file is the name of the database file, with the extension
+    .index and all "/" replaced with "___"
+
+    The content of the index file is a json object, where the keys are keys inside
+    the database json file, and the values are lists of 5 elements:
+    - start_index: The index of the first byte of the value of the key in the database file
+    - end_index: The index of the last byte of the value of the key in the database file
+    - indent_level: The indent level of the key in the database file
+    - indent_with: The indent string used.
+    - value_hash: The hash of the value bytes
+    """
+
+    __slots__ = ("data", "path")
+
+    def __init__(self, db_name: str):
+        # Make path of index file
+        db_name = db_name.replace("/", "___")
+        self.path = os.path.join(config.storage_directory, ".ddb", f"{db_name}.index")
+
+        os.makedirs(os.path.dirname(self.path), exist_ok=True)
+        if not os.path.exists(self.path):
+            self.data = {}
+            return
+
+        try:
+            with open(self.path, "rb") as f:
+                self.data = orjson.loads(f.read())
+        except orjson.JSONDecodeError:
+            self.data = {}
+
+    def get(self, key):
+        """
+        Returns a list of 5 elements for a key if it exists, otherwise None
+        Elements:[start_index, end_index, indent_level, indent_with, value_hash]
+        """
+        return self.data.get(key, None)
+
+    def write(self, index: Index):
+        """
+        Write index information for a key to the index file
+        """
+
+        if self.data.get(index.key, None) is not None:
+            delta = index.key_end - index.old_value_end
+            for entry in self.data.values():
+                if entry[0] > index.old_value_end:
+                    entry[0] += delta
+                    entry[1] += delta
+
+        self.data[index.key] = [
+            index.key_start,
+            index.key_end,
+            index.indent_level,
+            index.indent_with,
+            index.value_hash,
+        ]
+        with open(self.path, "wb") as f:
+            f.write(orjson.dumps(self.data))
diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py
@@ -1,10 +1,20 @@
 from __future__ import annotations
-from typing import Tuple
+
+import hashlib
+import json
 from dataclasses import dataclass
+from typing import Tuple
+
 import orjson
-import json
-import hashlib
-from . import config, utils, byte_codes, indexing, io_bytes
+
+from . import byte_codes
+from . import config
+from . import indexing
+from . import io_bytes
+from . import searching
+from . import utils
+from .dataclasses import Index
+from .index_manager import create_index
 
 
 @dataclass(frozen=True)  # slots=True not supported by python 3.8 and 3.9
@@ -46,7 +56,9 @@ def read(db_name: str) -> dict:
 ########################################################################################
 
 
-def try_read_bytes_using_indexer(indexer: indexing.Indexer, db_name: str, key: str) -> bytes | None:
+def try_read_bytes_using_indexer(
+	indexer: indexing.Indexer, db_name: str, key: str
+) -> bytes | None:
 	"""
 		Check if the key info is saved in the file's index file.
 		If it is and the value has not changed, return the value bytes.
@@ -79,21 +91,12 @@ def partial_read_only(db_name: str, key: str) -> dict | None:
 
 	# Not found in index file, search for key in the entire file
 	all_file_bytes = io_bytes.read(db_name)
-	key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
-
-	if key_end == -1:
+	start, end, found = searching.search_value_position_in_db(all_file_bytes, key)
+	if not found:
 		return None
-
-	# Key found, now determine the bounding byte indices of the value
-	start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
-	end = utils.seek_index_through_value_bytes(all_file_bytes, start)
-
-	indent_level, indent_with  = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
 	value_bytes = all_file_bytes[start:end]
-	value_hash = hashlib.sha256(value_bytes).hexdigest()
-
 	# Write key info to index file
-	indexer.write(key, start, end, indent_level, indent_with, value_hash, end)
+	indexer.write(create_index(all_file_bytes, key, start, end))
 	return orjson.loads(value_bytes)
 
 
@@ -130,7 +133,9 @@ def write(db_name: str, data: dict):
 ################################################################################
 
 
-def try_get_parial_file_handle_by_index(indexer: indexing.Indexer, db_name, key) -> Tuple[PartialFileHandle | None, bytes | None]:
+def try_get_parial_file_handle_by_index(
+	indexer: indexing.Indexer, db_name, key
+) -> Tuple[PartialFileHandle | None, bytes | None]:
 	"""
 		Try to get a partial file handle by using the key entry in the index file.
 
@@ -151,7 +156,9 @@ def try_get_parial_file_handle_by_index(indexer: indexing.Indexer, db_name, key)
 		if value_hash != hashlib.sha256(value_bytes).hexdigest():
 			return None, all_file_bytes
 		value_data = orjson.loads(value_bytes)
-		partial_dict = PartialDict(all_file_bytes[:start], key, value_data, start, end, all_file_bytes[end:])
+		partial_dict = PartialDict(
+			all_file_bytes[:start], key, value_data, start, end, all_file_bytes[end:]
+		)
 
 	# If compression is disabled, only the value and suffix have to be read
 	else:
@@ -163,9 +170,14 @@ def try_get_parial_file_handle_by_index(indexer: indexing.Indexer, db_name, key)
 			prefix_bytes = io_bytes.read(db_name, end=start)
 			return None, prefix_bytes + value_and_suffix_bytes
 		value_data = orjson.loads(value_bytes)
-		partial_dict = PartialDict(None, key, value_data, start, end, value_and_suffix_bytes[value_length:])
+		partial_dict = PartialDict(
+			None, key, value_data, start, end, value_and_suffix_bytes[value_length:]
+		)
 
-	return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer), None
+	return (
+		PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer),
+		None,
+	)
 
 
 def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
@@ -180,25 +192,33 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
 
 	# Search for key in the index file
 	indexer = indexing.Indexer(db_name)
-	partial_handle, all_file_bytes = try_get_parial_file_handle_by_index(indexer, db_name, key)
+	partial_handle, all_file_bytes = try_get_parial_file_handle_by_index(
+		indexer, db_name, key
+	)
 	if partial_handle is not None:
 		return partial_handle
 
 	# Not found in index file, search for key in the entire file
-	key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
+	position = searching.search_key_position_in_db(all_file_bytes, key)
 
-	if key_end == -1:
-		raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"")
+	if not position.found:
+		raise KeyError(f'Key "{key}" not found in db "{db_name}"')
 
 	# Key found, now determine the bounding byte indices of the value
-	start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
+	start = position.end_byte + (
+		1 if all_file_bytes[position.end_byte] == byte_codes.SPACE else 0
+	)
 	end = utils.seek_index_through_value_bytes(all_file_bytes, start)
 
-	indent_level, indent_with  = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
+	indent_level, indent_with = utils.detect_indentation_in_json_bytes(
+		all_file_bytes, position.start_byte
+	)
 
 	partial_value = orjson.loads(all_file_bytes[start:end])
 	prefix_bytes = all_file_bytes[:start] if config.use_compression else None
-	partial_dict = PartialDict(prefix_bytes, key, partial_value, start, end, all_file_bytes[end:])
+	partial_dict = PartialDict(
+		prefix_bytes, key, partial_value, start, end, all_file_bytes[end:]
+	)
 	return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer)
 
 
@@ -216,19 +236,26 @@ def partial_write(pf: PartialFileHandle):
 		partial_bytes = partial_bytes.replace(replace_this, replace_with)
 
 	# Write key info to index file
-	pf.indexer.write(
+	index = Index(
 		key=pf.partial_dict.key,
-		start_index=pf.partial_dict.value_start,
-		end_index=pf.partial_dict.value_start + len(partial_bytes),
+		key_start=pf.partial_dict.value_start,
+		key_end=pf.partial_dict.value_start + len(partial_bytes),
 		indent_level=pf.indent_level,
 		indent_with=pf.indent_with,
 		value_hash=hashlib.sha256(partial_bytes).hexdigest(),
 		old_value_end=pf.partial_dict.value_end,
 	)
+	pf.indexer.write(index)
 
 	if pf.partial_dict.prefix is None:
 		# Prefix could not be determined due to compression, so write the entire file
-		io_bytes.write(pf.db_name, partial_bytes + pf.partial_dict.suffix, start=pf.partial_dict.value_start)
+		io_bytes.write(
+			pf.db_name,
+			partial_bytes + pf.partial_dict.suffix,
+			start=pf.partial_dict.value_start,
+		)
 	else:
 		# Prefix was determined, so only write the changed part and the suffix
-		io_bytes.write(pf.db_name, pf.partial_dict.prefix + partial_bytes + pf.partial_dict.suffix)
+		io_bytes.write(
+			pf.db_name, pf.partial_dict.prefix + partial_bytes + pf.partial_dict.suffix
+		)