From ded6e9b133c3308ce87f9404f51ccd8ce15af5af Mon Sep 17 00:00:00 2001
From: Allison Thackston <allison@allisonthackston.com>
Date: Mon, 30 Oct 2023 15:00:33 -0700
Subject: [PATCH] Update semiliterate for better testing and add more tests.
 (#606)

---
 mkdocs_simple_plugin/semiliterate.py | 146 ++++---
 tests/test_semiliterate.py           | 582 +++++++++++++++++++++------
 2 files changed, 536 insertions(+), 192 deletions(-)

diff --git a/mkdocs_simple_plugin/semiliterate.py b/mkdocs_simple_plugin/semiliterate.py
index c3a9048e..aa915e18 100644
--- a/mkdocs_simple_plugin/semiliterate.py
+++ b/mkdocs_simple_plugin/semiliterate.py
@@ -1,4 +1,5 @@
 """Semiliterate module handles document extraction from source files."""
+from io import TextIOWrapper
 import os
 import re
 
@@ -7,14 +8,7 @@
 from mkdocs import utils
 
 
-def get_line(line: str) -> str:
-    """Returns line with EOL."""
-    if not line:
-        return None
-    return line if line.endswith("\n") else line + '\n'
-
-
-def get_match(pattern: re.Pattern, line: str) -> re.Match:
+def _get_match(pattern: re.Pattern, line: str) -> re.Match:
     """Returns the match for the given pattern."""
     if not pattern:
         return None
@@ -149,15 +143,15 @@ def setup(self, line: str) -> None:
         """Process input parameters."""
         setup_inline = InlineParams()
 
-        file_match = get_match(setup_inline.filename_pattern, line)
+        file_match = _get_match(setup_inline.filename_pattern, line)
         if file_match and file_match.lastindex:
             setup_inline.filename = file_match[file_match.lastindex]
 
-        trim_match = get_match(setup_inline.trim_pattern, line)
+        trim_match = _get_match(setup_inline.trim_pattern, line)
         if trim_match and trim_match.lastindex:
             setup_inline.trim = int(trim_match[trim_match.lastindex])
 
-        content_match = get_match(setup_inline.content_pattern, line)
+        content_match = _get_match(setup_inline.content_pattern, line)
         if content_match and content_match.lastindex:
             regex_pattern = content_match[content_match.lastindex]
             setup_inline.content = re.compile(regex_pattern)
@@ -167,7 +161,7 @@ def setup(self, line: str) -> None:
         #     1. default from extraction pattern settings
         #     2. default from inline params
         self.stop = self._stop_default
-        stop_match = get_match(setup_inline.stop_pattern, line)
+        stop_match = _get_match(setup_inline.stop_pattern, line)
         if stop_match and stop_match.lastindex:
             regex_pattern = stop_match[stop_match.lastindex]
             self.stop = re.compile(regex_pattern)
@@ -185,7 +179,7 @@ def replace_line(self, line: str) -> str:
             line = line[self.inline.trim:]
         # Process inline content regex
         if self.inline.content:
-            match_object = get_match(self.inline.content, line)
+            match_object = _get_match(self.inline.content, line)
             if match_object.lastindex:
                 return match_object[match_object.lastindex]
         # Preform replace operations
@@ -227,126 +221,132 @@ def __str__(self) -> str:
         return os.path.join(self.file_directory, self.file_name)
 
     def write(self, arg: str) -> None:
-        """Create and write the file, only if not empty."""
-        if not arg:
+        """Create and write a string line to the file, iff not none."""
+        if arg is None:
             return
         if self.file_object is None:
             filename = os.path.join(self.file_directory, self.file_name)
             os.makedirs(self.file_directory, exist_ok=True)
             self.file_object = open(filename, 'w+')
-        self.file_object.write(arg)
+
+        def get_line(line: str) -> str:
+            """Returns line with EOL."""
+            return line if line.endswith("\n") else line + '\n'
+
+        self.file_object.write(get_line(arg))
 
     def close(self) -> str:
         """Finish the file."""
         if self.file_object is not None:
-            file = os.path.join(self.file_directory, self.file_name)
-            utils.log.debug("        ... extracted %s", file)
+            file_path = os.path.join(self.file_directory, self.file_name)
+            utils.log.debug("        ... extracted %s", file_path)
             self.file_object.close()
             self.file_object = None
-            return file
+            return file_path
         return None
 
 
 class StreamExtract:
-    """Extract documentation portions of files to an output stream."""
+    """Extract files to an output stream.
+
+    Optionally filter using a list of ExtractionPatterns.
+    """
 
     def __init__(
             self,
-            input_stream: LazyFile,
+            input_stream: TextIOWrapper,
             output_stream: LazyFile,
             terminate: re.Pattern = None,
             patterns: ExtractionPattern = None,
             **kwargs):
         """Initialize StreamExtract with input and output streams."""
         self.input_stream = input_stream
-        self.default_stream = output_stream
         self.output_stream = output_stream
         self.terminate = terminate
         self.patterns = patterns
-        self.wrote_something = False
-        self.output_files = []
-        self.streams = {
+
+        self._default_stream = output_stream
+        self._output_files = []
+        self._streams = {
             output_stream.file_name: output_stream
         }
 
-    def transcribe(self, text: str) -> None:
-        """Write some text and record if something was written."""
-        self.output_stream.write(text)
-        if text:
-            self.wrote_something = True
-
-    def try_extract_match(
+    def _try_extract_match(
             self,
             match_object: re.Match,
             emit_last: bool = True) -> bool:
-        """Extract match into output.
+        """Extracts line iff there's a match.
 
-        If _match_object_ is not false-y, returns true.
-        If extract flag is true, emits the last group of the match if any.
+        Returns:
+            True iff match_object exists.
         """
         if not match_object:
             return False
         if match_object.lastindex and emit_last:
-            self.transcribe(get_line(match_object[match_object.lastindex]))
+            self.output_stream.write(match_object[match_object.lastindex])
         return True
 
     def close(self) -> list:
-        """Returns true if something was written"""
+        """Close the file and return a list of filenames written to."""
         file = self.output_stream.close()
-        if file and self.wrote_something:
-            self.output_files.append(file)
-        return self.output_files
+        if file:
+            self._output_files.append(file)
+        return self._output_files
 
-    def set_output_file(self, filename: str) -> None:
-        """Set output stream from filename."""
+    def set_output_file(self, filename: str) -> LazyFile:
+        """Set the current output stream from filename and return the stream."""
         output_stream = self.output_stream
         if filename:
             # If we've opened this file before, re-use its stream.
-            if filename in self.streams:
-                return self.set_output_stream(self.streams[filename])
+            if filename in self._streams:
+                return self.set_output_stream(self._streams[filename])
             # Otherwise, make a new one and save it to the list.
             output_stream = LazyFile(
                 self.output_stream.file_directory, filename)
-            self.streams[filename] = output_stream
-        self.set_output_stream(output_stream)
+            self._streams[filename] = output_stream
+        return self.set_output_stream(output_stream)
 
-    def set_output_stream(self, stream: LazyFile) -> None:
-        """Set the output stream."""
+    def set_output_stream(self, stream: LazyFile) -> LazyFile:
+        """Set the current output stream and return the stream."""
         if self.output_stream != stream:
             self.close()
             self.output_stream = stream
+        return self.output_stream
 
     def extract(self, **kwargs) -> list:
         """Extract from file with semiliterate configuration.
 
-        Invoke this method to perform the extraction. Returns true if
-        any text is actually extracted, false otherwise.
+        Invoke this method to perform the extraction.
+
+        Returns:
+            A list of files extracted.
         """
         active_pattern = None if self.patterns else ExtractionPattern()
-        for pattern in self.patterns:
+        patterns = self.patterns if self.patterns else []
+        for pattern in patterns:
             if not pattern.start:
                 active_pattern = pattern
 
         for line in self.input_stream:
             # Check terminate, regardless of state:
-            if self.try_extract_match(
-                    get_match(self.terminate, line), active_pattern):
+            if self._try_extract_match(
+                    _get_match(self.terminate, line), active_pattern):
                 return self.close()
             # Change state if flagged to do so:
             if active_pattern is None:
-                for pattern in self.patterns:
-                    start = get_match(pattern.start, line)
+                for pattern in patterns:
+                    start = _get_match(pattern.start, line)
                     if start:
                         active_pattern = pattern
                         active_pattern.setup(line)
                         self.set_output_file(active_pattern.get_filename())
-                        self.try_extract_match(start)
+                        self._try_extract_match(start)
                         break
                 continue
             # We are extracting. See if we should stop:
-            if self.try_extract_match(get_match(active_pattern.stop, line)):
+            if self._try_extract_match(_get_match(active_pattern.stop, line)):
                 active_pattern = None
-                self.set_output_stream(self.default_stream)
+                self.set_output_stream(self._default_stream)
                 continue
             # Extract all other lines in the normal way:
             self.extract_line(line, active_pattern)
@@ -354,8 +354,8 @@ def extract(self, **kwargs) -> list:
 
     def extract_line(self, line: str, extraction_pattern: re.Pattern) -> None:
         """Copy line to the output stream, applying specified replacements."""
-        line = get_line(extraction_pattern.replace_line(line))
-        self.transcribe(line)
+        line = extraction_pattern.replace_line(line)
+        self.output_stream.write(line)
 
 
 class Semiliterate:
@@ -410,18 +410,25 @@ def __init__(
         self.file_filter = re.compile(pattern)
         self.destination = destination
         self.terminate = (terminate is not None) and re.compile(terminate)
-        self.patterns = []
+        self.extractions = []
         if not extract:
             extract = []
         if isinstance(extract, dict):
             # if there is only one extraction pattern, allow it to be a single
             # dict entry
             extract = [extract]
-        for pattern in extract:
-            self.patterns.append(ExtractionPattern(**pattern))
+        for extract_params in extract:
+            self.extractions.append(ExtractionPattern(**extract_params))
 
     def filename_match(self, name: str) -> str:
-        """Get the filename for the match, otherwise return None."""
+        """Get the filename for the match, otherwise return None.
+
+        Args:
+            name (str): The name to match with the pattern filter
+
+        Returns:
+            The output filename for 'name' or None
+        """
         name_match = self.file_filter.search(name)
         if name_match:
             new_name = os.path.splitext(name)[0] + '.md'
@@ -438,7 +445,12 @@ def try_extraction(
             **kwargs) -> list:
         """Try to extract documentation from file with name.
 
-        Returns True if extraction was successful.
+        Args:
+            from_directory (str): The source directory
+            from_file (str): The source filename within directory
+            destination_directory (str): The destination directory
+
+        Returns a list of extracted files.
         """
         to_file = self.filename_match(from_file)
         if not to_file:
@@ -452,11 +464,11 @@ def try_extraction(
                     input_stream=original_file,
                     output_stream=LazyFile(destination_directory, to_file),
                     terminate=self.terminate,
-                    patterns=self.patterns,
+                    patterns=self.extractions,
                     **kwargs)
                 return extraction.extract()
         except (UnicodeDecodeError) as error:
-            utils.log.info("mkdocs-simple-plugin: Skipped  %s", from_file_path)
+            utils.log.debug("mkdocs-simple-plugin: Skipped  %s", from_file_path)
             utils.log.debug(
                 "mkdocs-simple-plugin: Error details: %s", str(error))
         except (OSError, IOError) as error:
diff --git a/tests/test_semiliterate.py b/tests/test_semiliterate.py
index d1adc0b1..16fc09f4 100755
--- a/tests/test_semiliterate.py
+++ b/tests/test_semiliterate.py
@@ -1,11 +1,43 @@
 #!/usr/bin/env python
 """Test mkdocs_simple_plugin.semiliterate"""
 import unittest
-from unittest.mock import patch, mock_open, MagicMock
+from unittest.mock import MagicMock
 import os
 import re
+from io import TextIOWrapper
 
-from mkdocs_simple_plugin import semiliterate
+from pyfakefs import fake_filesystem_unittest
+
+from mkdocs_simple_plugin.semiliterate import (
+    ExtractionPattern,
+    LazyFile,
+    Semiliterate,
+    StreamExtract,
+)
+
+
+class FakeFsTestCase(fake_filesystem_unittest.TestCase):
+    """Custom common helper test functions."""
+
+    # pylint: disable=invalid-name
+    def assertContentsIn(self, path, contents):
+        """Assert that a file path contains contents"""
+        # Read the content of the fake file
+        with open(path, "r") as file:
+            file_content = file.read()
+
+        # Assert that the file contains a specific string
+        self.assertIn(contents, file_content)
+
+    # pylint: disable=invalid-name
+    def assertContentsEqual(self, path, expected_contents):
+        """Assert that the file matches the expected contents"""
+        with open(path, "r") as file:
+            file_contents = file.read().splitlines()
+        self.assertEqual(
+            file_contents,
+            expected_contents,
+            f"File at {path} does not contain the expected contents.")
 
 
 class TestExtractionPattern(unittest.TestCase):
@@ -13,7 +45,7 @@ class TestExtractionPattern(unittest.TestCase):
 
     def test_default(self):
         """Test the default configuration without any additional options."""
-        pattern = semiliterate.ExtractionPattern()
+        pattern = ExtractionPattern()
         # replace_line should just return the line
         self.assertEqual(pattern.replace_line("/** md "), "/** md ")
         self.assertEqual(pattern.replace_line("## Hello"), "## Hello")
@@ -27,44 +59,37 @@ def test_default(self):
 
     def test_setup_filename(self):
         """Test in-line setup for filename."""
-        pattern = semiliterate.ExtractionPattern()
-
+        pattern = ExtractionPattern()
         # Set filename
         pattern.setup("//md file=new_name.snippet")
         self.assertEqual(pattern.get_filename(), "new_name.snippet")
 
     def test_setup_trim(self):
         """Test in-line setup for trimming front."""
-        pattern = semiliterate.ExtractionPattern()
-
+        pattern = ExtractionPattern()
         # Set trim
         pattern.setup("//md trim=2")
-        line = "1234"
-        self.assertEqual(pattern.replace_line(line), "34")
-        line = "1"
-        self.assertEqual(pattern.replace_line(line), "")
+        self.assertEqual(pattern.replace_line("1234"), "34")
+        self.assertEqual(pattern.replace_line("1"), "")
 
     def test_setup_content(self):
         """Test in-line setup for capturing content."""
-        pattern = semiliterate.ExtractionPattern()
-
+        pattern = ExtractionPattern()
         # Set content
         pattern.setup("//md content='(hello)'")
-        line = "hello world"
-        self.assertEqual(pattern.replace_line(line), "hello")
+        self.assertEqual(pattern.replace_line("hello world"), "hello")
 
     def test_setup_stop(self):
         """Test in-line setup for stopping capture."""
-        pattern = semiliterate.ExtractionPattern()
-        stop_pattern = re.compile(".*(world)")
-        self.assertNotEqual(stop_pattern, pattern.stop)
+        pattern = ExtractionPattern()
         # Set stop
         pattern.setup("//md stop='.*(world)'")
+        stop_pattern = re.compile(".*(world)")
         self.assertEqual(stop_pattern, pattern.stop)
 
     def test_block_comment(self):
         """Test a nominal block start/replace/end pattern."""
-        pattern = semiliterate.ExtractionPattern(
+        pattern = ExtractionPattern(
             start=r'^\s*\/\*+\W?md\b',
             stop=r'^\s*\*\*/\s*$')
 
@@ -85,7 +110,7 @@ def test_block_comment(self):
 
     def test_line_comment(self):
         """Test replacing characters from a line."""
-        pattern = semiliterate.ExtractionPattern(
+        pattern = ExtractionPattern(
             start=r'^\s*\/\/+\W?md\b',
             stop=r'^\s*\/\/\send\smd\s*$',
             replace=[r'^\s*\/\/\s?(.*\n?)$', r'^.*$'])
@@ -110,135 +135,442 @@ def test_line_comment(self):
             pattern.stop.match("  // end md").string, "  // end md")
 
 
-class TestLazyFile(unittest.TestCase):
+class TestLazyFile(FakeFsTestCase):
     """Test LazyFile interface."""
 
+    def setUp(self):
+        """Set up fake filesystem."""
+        self.setUpPyfakefs()
+        self.directory = "/tmp/test_semiliterate/TestLazyFile"
+        self.file = "test_init"
+        self.full_path = os.path.join(self.directory, self.file)
+        self.fs.create_file(self.full_path)
+
     def test_write(self):
         """Test writing to lazy file."""
-        directory = "/tmp/test_semiliterate/TestLazyFile"
-        file = "test_init"
-        lazy_file = semiliterate.LazyFile(directory=directory, name=file)
-        full_path = os.path.join(directory, file)
-        self.assertEqual(str(lazy_file), full_path)
-        mock = mock_open()
-        with patch('mkdocs_simple_plugin.semiliterate.open',
-                   mock,
-                   create=True) as patched:
-            self.assertIs(patched, mock)
-            lazy_file.write('test line')
-            lazy_file.write('second_line')
-            lazy_file.close()
-
-        mock.assert_called_once_with(full_path, 'w+')
-        self.assertEqual(mock.return_value.write.call_count, 2)
-        mock.return_value.close.assert_called_once()
-
+        lazy_file = LazyFile(directory=self.directory, name=self.file)
+        lazy_file.write('test line')
+        lazy_file.write('second_line')
+        output = lazy_file.close()
+        self.assertEqual(output, self.full_path)
+        self.assertContentsEqual(self.full_path, ['test line', 'second_line'])
+
+    def test_write_none(self):
+        """Test that writing none results in none."""
+        lazy_file = LazyFile(directory=self.directory, name=self.file)
+        lazy_file.write(None)
+        output = lazy_file.close()
+        self.assertIsNone(output)
+
+    def test_write_empty(self):
+        """Test that writing an empty string results in empty line."""
+        lazy_file = LazyFile(directory=self.directory, name=self.file)
+        lazy_file.write('')
+        output = lazy_file.close()
+        self.assertEqual(output, self.full_path)
+        self.assertContentsEqual(self.full_path, [''])
+
+    def test_same_path_should_be_equal(self):
+        """Test that two LazyFiles are equal if they have the same path."""
+        lazy_file = LazyFile(directory=self.directory, name=self.file)
         self.assertEqual(
             lazy_file,
-            semiliterate.LazyFile(directory=directory, name=file))
+            LazyFile(directory=self.directory, name=self.file))
 
 
-class TestStreamExtract(unittest.TestCase):
+class TestStreamExtract(FakeFsTestCase):
     """Test extracting data to a stream."""
 
     def setUp(self):
         """Set up the mock for input, output, and stream."""
-        self.input_mock = MagicMock()
-        self.output_mock = MagicMock()
-        self.test_stream = semiliterate.StreamExtract(
-            input_stream=self.input_mock, output_stream=self.output_mock)
-
-    def test_transcribe(self):
-        """Transcribing data should write data."""
-        self.assertFalse(self.test_stream.wrote_something)
-
-        self.test_stream.transcribe("test input")
-        self.output_mock.write.assert_called_once_with("test input")
-        self.assertTrue(self.test_stream.wrote_something)
-
-    def test_transcribe_none(self):
-        """Transcribing nothing should do nothing."""
-        self.assertFalse(self.test_stream.wrote_something)
-
-        self.test_stream.transcribe("")
-        self.assertFalse(self.test_stream.wrote_something)
-
-    def test_extract_match(self):
-        """Test extracting from a regex match."""
-        self.assertFalse(self.test_stream.wrote_something)
+        self.setUpPyfakefs()
+        self.input_stream = MagicMock(spec=TextIOWrapper)
+        self.output_dir = "test"
+        self.output_filename = "output.md"
+        self.output_path = os.path.join(self.output_dir, self.output_filename)
+        self.output_stream = LazyFile(
+            directory=self.output_dir,
+            name=self.output_filename)
+        self.output_stream.file_name = "output.md"
+        self.stream_extract = StreamExtract(
+            input_stream=self.input_stream, output_stream=self.output_stream)
+
+    def test_set_output_file_with_filename(self):
+        """Should set output file by filename."""
+        filename = "test_output.md"
+        file = self.stream_extract.set_output_file(filename)
+        # It returns the stream
+        self.assertEqual(file, self.stream_extract.output_stream)
+        # It is the right type
+        self.assertIsInstance(file, LazyFile)
+        # It saved the filename
+        self.assertEqual(file.file_name, filename)
+
+    def test_set_output_file_with_empty_filename(self):
+        """Empty file should not change output stream."""
+        original = self.stream_extract.output_stream
+        self.stream_extract.set_output_file("")
+        self.assertEqual(
+            self.stream_extract.output_stream, original)
 
-        self.test_stream.try_extract_match(None)
-        self.assertFalse(self.test_stream.wrote_something)
+    def test_set_output_stream(self):
+        """Should set output stream from a new LazyFile."""
+        new_output_stream = LazyFile(directory="new", name="test.md")
+        self.stream_extract.set_output_stream(new_output_stream)
+        self.assertEqual(self.stream_extract.output_stream, new_output_stream)
 
-        mock_value = ("test first", "test second")
+    def test_close_multiple_files(self):
+        """Setting the filename to a new file should create a new stream."""
+        self.output_stream.write("test output 1")
 
-        def index_func(self, value):
-            return mock_value[value]
-        mock_match = MagicMock(return_value=mock_value)
-        mock_match.__getitem__ = index_func
-        mock_match.lastindex.return_value = 1
-        self.test_stream.try_extract_match(mock_match)
-        self.output_mock.write.assert_called_once_with("test second\n")
-        self.assertTrue(self.test_stream.wrote_something)
+        test_filename = "new_name.snippet"
+        test_path = os.path.join(self.output_dir, test_filename)
+        new_stream = self.stream_extract.set_output_file(test_filename)
+        new_stream.write("test output 2")
 
-    def test_set_output_stream_new(self):
-        """Setting the filename to a new file should create a new stream."""
-        self.output_mock.file_name = "test_name"
-        self.output_mock.file_directory = "/test/dir/"
+        files = self.stream_extract.close()
 
-        self.test_stream.set_output_file("new_name.snippet")
-        self.output_mock.close.assert_called_once()
         self.assertEqual(
-            self.test_stream.output_stream.file_name, "new_name.snippet")
+            self.stream_extract.output_stream.file_name, test_filename)
+        self.assertEqual(len(files), 2)
+        self.assertIn(self.output_path, files, str(files))
+        self.assertIn(test_path, files, str(files))
 
-    def test_set_output_stream_same(self):
+    def test_close_same_file(self):
         """Setting the output stream to the same file should do nothing."""
-        self.output_mock.file_name = "test_name"
-        self.output_mock.file_directory = "/test/dir/"
-        self.test_stream.set_output_file("test_name")
-        self.output_mock.close.assert_not_called()
+        self.output_stream.write("test output 1")
 
+        # Try setting same file
+        new_stream = self.stream_extract.set_output_file(self.output_filename)
+        new_stream.write("test output 2")
 
-class TestSemiliterate(unittest.TestCase):
+        files = self.stream_extract.close()
+
+        self.assertEqual(
+            self.stream_extract.output_stream.file_name, self.output_filename)
+        self.assertEqual(len(files), 1)
+
+    def test_extract(self):
+        """Test extraction"""
+        self.stream_extract.patterns = [
+            ExtractionPattern(start=r'START', stop=r'STOP')
+        ]
+        lines = ['Line 1', 'START', 'Extracted Text', 'STOP', 'Line 2']
+
+        # Set up mock behavior for input_stream
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+
+        # Perform the extraction
+        output_files = self.stream_extract.extract()
+
+        # Assertions
+        self.assertEqual(len(output_files), 1)
+        # extracted text between start and stop
+        self.assertContentsEqual(self.output_path, ['Extracted Text'])
+
+    def test_extract_no_patterns(self):
+        """Extraction without patterns should extract the whole file."""
+        lines = ['Line 1', 'Line 2', 'Line 3']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # each line extracted
+        self.assertContentsEqual(self.output_path, lines)
+
+    def test_extract_pattern_without_start(self):
+        """Extraction without the start attribute should extract until stop."""
+        self.stream_extract.patterns = [ExtractionPattern(stop=r'STOP')]
+        lines = ['Line 1', 'START', 'STOP', 'Line 2']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # lines extracted until stop
+        self.assertContentsEqual(self.output_path, ['Line 1', 'START'])
+
+    def test_extract_multiple_patterns(self):
+        """Test extraction with multiple patterns."""
+        self.stream_extract.patterns = [
+            ExtractionPattern(start=r'START1', stop=r'STOP1'),
+            ExtractionPattern(start=r'START2', stop=r'STOP2')
+        ]
+        lines = [
+            'Line 1',
+            'START1',
+            'Content 1',
+            'STOP1',
+            'START2',
+            'Content 2',
+            'STOP2',
+            'Line 2']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # lines between start and stop extracted
+        self.assertContentsEqual(self.output_path, ['Content 1', 'Content 2'])
+
+    def test_extract_empty_lines(self):
+        """Test extraction with empty lines in the input."""
+        self.stream_extract.patterns = [
+            ExtractionPattern(
+                start=r'START',
+                stop=r'STOP')]
+        lines = ['Line 1', '', 'START', '', 'Content', 'STOP', 'Line 2']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # empty line extracted
+        self.assertContentsEqual(self.output_path, ['', 'Content'])
+
+    def test_extract_pattern_at_stop(self):
+        """Test extraction with a pattern matching at the end of input."""
+        self.stream_extract.patterns = [
+            ExtractionPattern(
+                start=r'START',
+                stop=r'STOP(.*)$')]
+        lines = ['Line 1', 'START', 'Content', 'STOP:Capture end']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # Lines and stop pattern extracted
+        self.assertContentsEqual(self.output_path, ['Content', ':Capture end'])
+
+    def test_extract_pattern_at_start(self):
+        """Test extraction with a pattern matching at the end of input."""
+        self.stream_extract.patterns = [
+            ExtractionPattern(
+                start=r'START(.*)$',
+                stop=r'STOP')]
+        lines = ['Line 1', 'START:Capture start', 'Content', 'STOP:Capture end']
+        input_stream_iterator = iter(lines)
+        self.input_stream.__iter__.return_value = input_stream_iterator
+        output_files = self.stream_extract.extract()
+
+        # one file extracted
+        self.assertEqual(len(output_files), 1)
+        # Start pattern extracted plus content
+        self.assertContentsEqual(
+            self.output_path,
+            [':Capture start', 'Content'])
+
+
+class TestSemiliterate(FakeFsTestCase):
     """Test the Semiliterate base class."""
 
-    @patch('mkdocs_simple_plugin.semiliterate.StreamExtract')
-    def test_try_extraction_default(self, mock_stream_extract):
-        """Test extraction."""
-        test_semiliterate = semiliterate.Semiliterate(
-            pattern=r".*")
-        mock_stream_extract.extract.return_value = True
-        mock = mock_open()
-        with patch('mkdocs_simple_plugin.semiliterate.open',
-                   mock,
-                   create=True) as patched:
-            self.assertIs(patched, mock)
-            self.assertTrue(test_semiliterate.try_extraction(
-                from_directory="/test/dir",
-                from_file="test_file.md",
-                destination_directory="/out/dir"))
-        mock.assert_called_once_with("/test/dir/test_file.md")
-        assert mock_stream_extract is semiliterate.StreamExtract
-        mock_stream_extract.called_once()
-
-    @patch('mkdocs_simple_plugin.semiliterate.StreamExtract')
-    def test_try_extraction_skip(self, mock_stream_extract):
-        """Test skipping extraction for name mismatch with pattern filter"""
-        test_semiliterate = semiliterate.Semiliterate(
-            pattern=r".py")
-        mock_stream_extract.extract.return_value = True
-        mock = mock_open()
-        with patch('mkdocs_simple_plugin.semiliterate.open',
-                   mock,
-                   create=True) as patched:
-            self.assertIs(patched, mock)
-            self.assertFalse(test_semiliterate.try_extraction(
-                from_directory="/test/dir",
-                from_file="test_file.md",
-                destination_directory="/out/dir"))
-        mock.assert_not_called()
-        assert mock_stream_extract is semiliterate.StreamExtract
-        mock_stream_extract.assert_not_called()
+    def setUp(self):
+        """Set up fake filesystem."""
+        self.setUpPyfakefs()
+
+    def test_filename_match_with_match(self):
+        """Filename match with txt should match example.txt."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        match = test_semiliterate.filename_match('example.txt')
+        self.assertEqual(match, 'example.md')
+
+    def test_filename_match_without_match(self):
+        """Filename match with txt should not match example.jpg."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        match = test_semiliterate.filename_match('example.jpg')
+        self.assertIsNone(match)
+
+    def test_filename_match_with_destination(self):
+        """Test filename matching with a destination pattern."""
+        test_semiliterate = Semiliterate(pattern=r'(.*)\.txt')
+        test_semiliterate.destination = r'\1_output.md'
+        name = "example.txt"
+        result = test_semiliterate.filename_match(name)
+        self.assertEqual(result, "example_output.md")
+
+    def test_try_extraction_successful(self):
+        """Test extraction of a txt file to an md file should succeed."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        directory = "/source"
+        filename = "example.txt"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/example.md"
+        self.fs.create_file(path, contents="Sample content")
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction failed
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
+
+    def test_try_extraction_no_match(self):
+        """Test extraction of a non matching file to an md file should fail."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        directory = "/source"
+        filename = "example.py"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        self.fs.create_file(path, contents="Sample content")
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertFalse(result)  # Extraction failed
+        self.assertListEqual(result, [])
+
+    def test_try_extraction_io_error(self):
+        """Test extraction of a non existent file should fail."""
+        semiliterate = Semiliterate(pattern=r'.*\.txt')
+        result = semiliterate.try_extraction(
+            from_directory='/source',
+            from_file='nonexistent.txt',
+            destination_directory='/output'
+        )
+        self.assertFalse(result)  # Extraction failed
+        self.assertListEqual(result, [])
+
+    def test_unicode_filenames_and_content(self):
+        """Test behavior with non-ASCII filenames and content."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        directory = "/source"
+        filename = "mön.txt"  # Non-ASCII filename
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/mön.md"  # Expected output filename
+
+        # Create and write content with non-ASCII characters
+        content = 'Non-ASCII content: mön\n'
+        self.fs.create_file(path, contents=content)
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction successful
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
+
+    def test_large_input_file(self):
+        """Test behavior with a large input file."""
+        test_semiliterate = Semiliterate(pattern=r'.*\.txt')
+        directory = "/source"
+        filename = "large.txt"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/large.md"  # Expected output filename
+
+        # Create a large content string (simulate a large file)
+        large_content = 'A' * 10_000_000  # 10 MB content
+        self.fs.create_file(path, contents=large_content)
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction successful
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
+
+    def test_extract_with_custom_termination_pattern(self):
+        """Test extraction with a custom termination pattern."""
+        test_semiliterate = Semiliterate(
+            pattern=r'.*\.txt',
+            terminate=r'^END'  # Custom termination pattern
+        )
+        directory = "/source"
+        filename = "custom_termination.txt"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/custom_termination.md"
+
+        # Create content with the custom termination pattern
+        content = 'Content before END\nEND\nContent after END'
+        self.fs.create_file(path, contents=content)
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction successful
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
+
+    def test_extract_with_multiple_extraction_patterns(self):
+        """Test extraction with multiple extraction patterns."""
+        test_semiliterate = Semiliterate(
+            pattern=r'.*\.txt',
+            extract=[
+                {
+                    "start": r'^START',
+                    "stop": r'^STOP'
+                },
+                {
+                    "start": r'^BEGIN',
+                    "stop": r'^END'
+                }
+            ]
+        )
+        directory = "/source"
+        filename = "multiple_patterns.txt"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/multiple_patterns.md"
+
+        # Create content with multiple extraction patterns
+        content = 'Content before START\nSTART\nExtracted Text 1\nSTOP\n' \
+            'Content between BEGIN and END\nBEGIN\nExtracted Text 2\nEND'
+        self.fs.create_file(path, contents=content)
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction successful
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
+
+    def test_extract_with_destination_template(self):
+        """Test extraction with a destination template."""
+        test_semiliterate = Semiliterate(
+            pattern=r'(.*)\.txt',
+            destination=r'\1_output.md'  # Destination template
+        )
+        directory = "/source"
+        filename = "template.txt"
+        path = os.path.join(directory, filename)
+        output = "/output"
+        expected_output_path = "/output/template_output.md"
+
+        # Create content
+        content = 'Sample content'
+        self.fs.create_file(path, contents=content)
+
+        result = test_semiliterate.try_extraction(
+            from_directory=directory,
+            from_file=filename,
+            destination_directory=output
+        )
+        self.assertTrue(result)  # Extraction successful
+        self.assertListEqual(result, [expected_output_path])
+        self.assertTrue(self.fs.exists(expected_output_path))
 
 
 if __name__ == '__main__':