diff --git a/mkdocs_simple_plugin/semiliterate.py b/mkdocs_simple_plugin/semiliterate.py index c3a9048e..aa915e18 100644 --- a/mkdocs_simple_plugin/semiliterate.py +++ b/mkdocs_simple_plugin/semiliterate.py @@ -1,4 +1,5 @@ """Semiliterate module handles document extraction from source files.""" +from io import TextIOWrapper import os import re @@ -7,14 +8,7 @@ from mkdocs import utils -def get_line(line: str) -> str: - """Returns line with EOL.""" - if not line: - return None - return line if line.endswith("\n") else line + '\n' - - -def get_match(pattern: re.Pattern, line: str) -> re.Match: +def _get_match(pattern: re.Pattern, line: str) -> re.Match: """Returns the match for the given pattern.""" if not pattern: return None @@ -149,15 +143,15 @@ def setup(self, line: str) -> None: """Process input parameters.""" setup_inline = InlineParams() - file_match = get_match(setup_inline.filename_pattern, line) + file_match = _get_match(setup_inline.filename_pattern, line) if file_match and file_match.lastindex: setup_inline.filename = file_match[file_match.lastindex] - trim_match = get_match(setup_inline.trim_pattern, line) + trim_match = _get_match(setup_inline.trim_pattern, line) if trim_match and trim_match.lastindex: setup_inline.trim = int(trim_match[trim_match.lastindex]) - content_match = get_match(setup_inline.content_pattern, line) + content_match = _get_match(setup_inline.content_pattern, line) if content_match and content_match.lastindex: regex_pattern = content_match[content_match.lastindex] setup_inline.content = re.compile(regex_pattern) @@ -167,7 +161,7 @@ def setup(self, line: str) -> None: # 1. default from extraction pattern settings # 2. default from inline params self.stop = self._stop_default - stop_match = get_match(setup_inline.stop_pattern, line) + stop_match = _get_match(setup_inline.stop_pattern, line) if stop_match and stop_match.lastindex: regex_pattern = stop_match[stop_match.lastindex] self.stop = re.compile(regex_pattern) @@ -185,7 +179,7 @@ def replace_line(self, line: str) -> str: line = line[self.inline.trim:] # Process inline content regex if self.inline.content: - match_object = get_match(self.inline.content, line) + match_object = _get_match(self.inline.content, line) if match_object.lastindex: return match_object[match_object.lastindex] # Preform replace operations @@ -227,126 +221,132 @@ def __str__(self) -> str: return os.path.join(self.file_directory, self.file_name) def write(self, arg: str) -> None: - """Create and write the file, only if not empty.""" - if not arg: + """Create and write a string line to the file, iff not none.""" + if arg is None: return if self.file_object is None: filename = os.path.join(self.file_directory, self.file_name) os.makedirs(self.file_directory, exist_ok=True) self.file_object = open(filename, 'w+') - self.file_object.write(arg) + + def get_line(line: str) -> str: + """Returns line with EOL.""" + return line if line.endswith("\n") else line + '\n' + + self.file_object.write(get_line(arg)) def close(self) -> str: """Finish the file.""" if self.file_object is not None: - file = os.path.join(self.file_directory, self.file_name) - utils.log.debug(" ... extracted %s", file) + file_path = os.path.join(self.file_directory, self.file_name) + utils.log.debug(" ... extracted %s", file_path) self.file_object.close() self.file_object = None - return file + return file_path return None class StreamExtract: - """Extract documentation portions of files to an output stream.""" + """Extract files to an output stream. + + Optionally filter using a list of ExtractionPatterns. + """ def __init__( self, - input_stream: LazyFile, + input_stream: TextIOWrapper, output_stream: LazyFile, terminate: re.Pattern = None, patterns: ExtractionPattern = None, **kwargs): """Initialize StreamExtract with input and output streams.""" self.input_stream = input_stream - self.default_stream = output_stream self.output_stream = output_stream self.terminate = terminate self.patterns = patterns - self.wrote_something = False - self.output_files = [] - self.streams = { + + self._default_stream = output_stream + self._output_files = [] + self._streams = { output_stream.file_name: output_stream } - def transcribe(self, text: str) -> None: - """Write some text and record if something was written.""" - self.output_stream.write(text) - if text: - self.wrote_something = True - - def try_extract_match( + def _try_extract_match( self, match_object: re.Match, emit_last: bool = True) -> bool: - """Extract match into output. + """Extracts line iff there's a match. - If _match_object_ is not false-y, returns true. - If extract flag is true, emits the last group of the match if any. + Returns: + True iff match_object exists. """ if not match_object: return False if match_object.lastindex and emit_last: - self.transcribe(get_line(match_object[match_object.lastindex])) + self.output_stream.write(match_object[match_object.lastindex]) return True def close(self) -> list: - """Returns true if something was written""" + """Close the file and return a list of filenames written to.""" file = self.output_stream.close() - if file and self.wrote_something: - self.output_files.append(file) - return self.output_files + if file: + self._output_files.append(file) + return self._output_files - def set_output_file(self, filename: str) -> None: - """Set output stream from filename.""" + def set_output_file(self, filename: str) -> LazyFile: + """Set the current output stream from filename and return the stream.""" output_stream = self.output_stream if filename: # If we've opened this file before, re-use its stream. - if filename in self.streams: - return self.set_output_stream(self.streams[filename]) + if filename in self._streams: + return self.set_output_stream(self._streams[filename]) # Otherwise, make a new one and save it to the list. output_stream = LazyFile( self.output_stream.file_directory, filename) - self.streams[filename] = output_stream - self.set_output_stream(output_stream) + self._streams[filename] = output_stream + return self.set_output_stream(output_stream) - def set_output_stream(self, stream: LazyFile) -> None: - """Set the output stream.""" + def set_output_stream(self, stream: LazyFile) -> LazyFile: + """Set the current output stream and return the stream.""" if self.output_stream != stream: self.close() self.output_stream = stream + return self.output_stream def extract(self, **kwargs) -> list: """Extract from file with semiliterate configuration. - Invoke this method to perform the extraction. Returns true if - any text is actually extracted, false otherwise. + Invoke this method to perform the extraction. + + Returns: + A list of files extracted. """ active_pattern = None if self.patterns else ExtractionPattern() - for pattern in self.patterns: + patterns = self.patterns if self.patterns else [] + for pattern in patterns: if not pattern.start: active_pattern = pattern for line in self.input_stream: # Check terminate, regardless of state: - if self.try_extract_match( - get_match(self.terminate, line), active_pattern): + if self._try_extract_match( + _get_match(self.terminate, line), active_pattern): return self.close() # Change state if flagged to do so: if active_pattern is None: - for pattern in self.patterns: - start = get_match(pattern.start, line) + for pattern in patterns: + start = _get_match(pattern.start, line) if start: active_pattern = pattern active_pattern.setup(line) self.set_output_file(active_pattern.get_filename()) - self.try_extract_match(start) + self._try_extract_match(start) break continue # We are extracting. See if we should stop: - if self.try_extract_match(get_match(active_pattern.stop, line)): + if self._try_extract_match(_get_match(active_pattern.stop, line)): active_pattern = None - self.set_output_stream(self.default_stream) + self.set_output_stream(self._default_stream) continue # Extract all other lines in the normal way: self.extract_line(line, active_pattern) @@ -354,8 +354,8 @@ def extract(self, **kwargs) -> list: def extract_line(self, line: str, extraction_pattern: re.Pattern) -> None: """Copy line to the output stream, applying specified replacements.""" - line = get_line(extraction_pattern.replace_line(line)) - self.transcribe(line) + line = extraction_pattern.replace_line(line) + self.output_stream.write(line) class Semiliterate: @@ -410,18 +410,25 @@ def __init__( self.file_filter = re.compile(pattern) self.destination = destination self.terminate = (terminate is not None) and re.compile(terminate) - self.patterns = [] + self.extractions = [] if not extract: extract = [] if isinstance(extract, dict): # if there is only one extraction pattern, allow it to be a single # dict entry extract = [extract] - for pattern in extract: - self.patterns.append(ExtractionPattern(**pattern)) + for extract_params in extract: + self.extractions.append(ExtractionPattern(**extract_params)) def filename_match(self, name: str) -> str: - """Get the filename for the match, otherwise return None.""" + """Get the filename for the match, otherwise return None. + + Args: + name (str): The name to match with the pattern filter + + Returns: + The output filename for 'name' or None + """ name_match = self.file_filter.search(name) if name_match: new_name = os.path.splitext(name)[0] + '.md' @@ -438,7 +445,12 @@ def try_extraction( **kwargs) -> list: """Try to extract documentation from file with name. - Returns True if extraction was successful. + Args: + from_directory (str): The source directory + from_file (str): The source filename within directory + destination_directory (str): The destination directory + + Returns a list of extracted files. """ to_file = self.filename_match(from_file) if not to_file: @@ -452,11 +464,11 @@ def try_extraction( input_stream=original_file, output_stream=LazyFile(destination_directory, to_file), terminate=self.terminate, - patterns=self.patterns, + patterns=self.extractions, **kwargs) return extraction.extract() except (UnicodeDecodeError) as error: - utils.log.info("mkdocs-simple-plugin: Skipped %s", from_file_path) + utils.log.debug("mkdocs-simple-plugin: Skipped %s", from_file_path) utils.log.debug( "mkdocs-simple-plugin: Error details: %s", str(error)) except (OSError, IOError) as error: diff --git a/tests/test_semiliterate.py b/tests/test_semiliterate.py index d1adc0b1..16fc09f4 100755 --- a/tests/test_semiliterate.py +++ b/tests/test_semiliterate.py @@ -1,11 +1,43 @@ #!/usr/bin/env python """Test mkdocs_simple_plugin.semiliterate""" import unittest -from unittest.mock import patch, mock_open, MagicMock +from unittest.mock import MagicMock import os import re +from io import TextIOWrapper -from mkdocs_simple_plugin import semiliterate +from pyfakefs import fake_filesystem_unittest + +from mkdocs_simple_plugin.semiliterate import ( + ExtractionPattern, + LazyFile, + Semiliterate, + StreamExtract, +) + + +class FakeFsTestCase(fake_filesystem_unittest.TestCase): + """Custom common helper test functions.""" + + # pylint: disable=invalid-name + def assertContentsIn(self, path, contents): + """Assert that a file path contains contents""" + # Read the content of the fake file + with open(path, "r") as file: + file_content = file.read() + + # Assert that the file contains a specific string + self.assertIn(contents, file_content) + + # pylint: disable=invalid-name + def assertContentsEqual(self, path, expected_contents): + """Assert that the file matches the expected contents""" + with open(path, "r") as file: + file_contents = file.read().splitlines() + self.assertEqual( + file_contents, + expected_contents, + f"File at {path} does not contain the expected contents.") class TestExtractionPattern(unittest.TestCase): @@ -13,7 +45,7 @@ class TestExtractionPattern(unittest.TestCase): def test_default(self): """Test the default configuration without any additional options.""" - pattern = semiliterate.ExtractionPattern() + pattern = ExtractionPattern() # replace_line should just return the line self.assertEqual(pattern.replace_line("/** md "), "/** md ") self.assertEqual(pattern.replace_line("## Hello"), "## Hello") @@ -27,44 +59,37 @@ def test_default(self): def test_setup_filename(self): """Test in-line setup for filename.""" - pattern = semiliterate.ExtractionPattern() - + pattern = ExtractionPattern() # Set filename pattern.setup("//md file=new_name.snippet") self.assertEqual(pattern.get_filename(), "new_name.snippet") def test_setup_trim(self): """Test in-line setup for trimming front.""" - pattern = semiliterate.ExtractionPattern() - + pattern = ExtractionPattern() # Set trim pattern.setup("//md trim=2") - line = "1234" - self.assertEqual(pattern.replace_line(line), "34") - line = "1" - self.assertEqual(pattern.replace_line(line), "") + self.assertEqual(pattern.replace_line("1234"), "34") + self.assertEqual(pattern.replace_line("1"), "") def test_setup_content(self): """Test in-line setup for capturing content.""" - pattern = semiliterate.ExtractionPattern() - + pattern = ExtractionPattern() # Set content pattern.setup("//md content='(hello)'") - line = "hello world" - self.assertEqual(pattern.replace_line(line), "hello") + self.assertEqual(pattern.replace_line("hello world"), "hello") def test_setup_stop(self): """Test in-line setup for stopping capture.""" - pattern = semiliterate.ExtractionPattern() - stop_pattern = re.compile(".*(world)") - self.assertNotEqual(stop_pattern, pattern.stop) + pattern = ExtractionPattern() # Set stop pattern.setup("//md stop='.*(world)'") + stop_pattern = re.compile(".*(world)") self.assertEqual(stop_pattern, pattern.stop) def test_block_comment(self): """Test a nominal block start/replace/end pattern.""" - pattern = semiliterate.ExtractionPattern( + pattern = ExtractionPattern( start=r'^\s*\/\*+\W?md\b', stop=r'^\s*\*\*/\s*$') @@ -85,7 +110,7 @@ def test_block_comment(self): def test_line_comment(self): """Test replacing characters from a line.""" - pattern = semiliterate.ExtractionPattern( + pattern = ExtractionPattern( start=r'^\s*\/\/+\W?md\b', stop=r'^\s*\/\/\send\smd\s*$', replace=[r'^\s*\/\/\s?(.*\n?)$', r'^.*$']) @@ -110,135 +135,442 @@ def test_line_comment(self): pattern.stop.match(" // end md").string, " // end md") -class TestLazyFile(unittest.TestCase): +class TestLazyFile(FakeFsTestCase): """Test LazyFile interface.""" + def setUp(self): + """Set up fake filesystem.""" + self.setUpPyfakefs() + self.directory = "/tmp/test_semiliterate/TestLazyFile" + self.file = "test_init" + self.full_path = os.path.join(self.directory, self.file) + self.fs.create_file(self.full_path) + def test_write(self): """Test writing to lazy file.""" - directory = "/tmp/test_semiliterate/TestLazyFile" - file = "test_init" - lazy_file = semiliterate.LazyFile(directory=directory, name=file) - full_path = os.path.join(directory, file) - self.assertEqual(str(lazy_file), full_path) - mock = mock_open() - with patch('mkdocs_simple_plugin.semiliterate.open', - mock, - create=True) as patched: - self.assertIs(patched, mock) - lazy_file.write('test line') - lazy_file.write('second_line') - lazy_file.close() - - mock.assert_called_once_with(full_path, 'w+') - self.assertEqual(mock.return_value.write.call_count, 2) - mock.return_value.close.assert_called_once() - + lazy_file = LazyFile(directory=self.directory, name=self.file) + lazy_file.write('test line') + lazy_file.write('second_line') + output = lazy_file.close() + self.assertEqual(output, self.full_path) + self.assertContentsEqual(self.full_path, ['test line', 'second_line']) + + def test_write_none(self): + """Test that writing none results in none.""" + lazy_file = LazyFile(directory=self.directory, name=self.file) + lazy_file.write(None) + output = lazy_file.close() + self.assertIsNone(output) + + def test_write_empty(self): + """Test that writing an empty string results in empty line.""" + lazy_file = LazyFile(directory=self.directory, name=self.file) + lazy_file.write('') + output = lazy_file.close() + self.assertEqual(output, self.full_path) + self.assertContentsEqual(self.full_path, ['']) + + def test_same_path_should_be_equal(self): + """Test that two LazyFiles are equal if they have the same path.""" + lazy_file = LazyFile(directory=self.directory, name=self.file) self.assertEqual( lazy_file, - semiliterate.LazyFile(directory=directory, name=file)) + LazyFile(directory=self.directory, name=self.file)) -class TestStreamExtract(unittest.TestCase): +class TestStreamExtract(FakeFsTestCase): """Test extracting data to a stream.""" def setUp(self): """Set up the mock for input, output, and stream.""" - self.input_mock = MagicMock() - self.output_mock = MagicMock() - self.test_stream = semiliterate.StreamExtract( - input_stream=self.input_mock, output_stream=self.output_mock) - - def test_transcribe(self): - """Transcribing data should write data.""" - self.assertFalse(self.test_stream.wrote_something) - - self.test_stream.transcribe("test input") - self.output_mock.write.assert_called_once_with("test input") - self.assertTrue(self.test_stream.wrote_something) - - def test_transcribe_none(self): - """Transcribing nothing should do nothing.""" - self.assertFalse(self.test_stream.wrote_something) - - self.test_stream.transcribe("") - self.assertFalse(self.test_stream.wrote_something) - - def test_extract_match(self): - """Test extracting from a regex match.""" - self.assertFalse(self.test_stream.wrote_something) + self.setUpPyfakefs() + self.input_stream = MagicMock(spec=TextIOWrapper) + self.output_dir = "test" + self.output_filename = "output.md" + self.output_path = os.path.join(self.output_dir, self.output_filename) + self.output_stream = LazyFile( + directory=self.output_dir, + name=self.output_filename) + self.output_stream.file_name = "output.md" + self.stream_extract = StreamExtract( + input_stream=self.input_stream, output_stream=self.output_stream) + + def test_set_output_file_with_filename(self): + """Should set output file by filename.""" + filename = "test_output.md" + file = self.stream_extract.set_output_file(filename) + # It returns the stream + self.assertEqual(file, self.stream_extract.output_stream) + # It is the right type + self.assertIsInstance(file, LazyFile) + # It saved the filename + self.assertEqual(file.file_name, filename) + + def test_set_output_file_with_empty_filename(self): + """Empty file should not change output stream.""" + original = self.stream_extract.output_stream + self.stream_extract.set_output_file("") + self.assertEqual( + self.stream_extract.output_stream, original) - self.test_stream.try_extract_match(None) - self.assertFalse(self.test_stream.wrote_something) + def test_set_output_stream(self): + """Should set output stream from a new LazyFile.""" + new_output_stream = LazyFile(directory="new", name="test.md") + self.stream_extract.set_output_stream(new_output_stream) + self.assertEqual(self.stream_extract.output_stream, new_output_stream) - mock_value = ("test first", "test second") + def test_close_multiple_files(self): + """Setting the filename to a new file should create a new stream.""" + self.output_stream.write("test output 1") - def index_func(self, value): - return mock_value[value] - mock_match = MagicMock(return_value=mock_value) - mock_match.__getitem__ = index_func - mock_match.lastindex.return_value = 1 - self.test_stream.try_extract_match(mock_match) - self.output_mock.write.assert_called_once_with("test second\n") - self.assertTrue(self.test_stream.wrote_something) + test_filename = "new_name.snippet" + test_path = os.path.join(self.output_dir, test_filename) + new_stream = self.stream_extract.set_output_file(test_filename) + new_stream.write("test output 2") - def test_set_output_stream_new(self): - """Setting the filename to a new file should create a new stream.""" - self.output_mock.file_name = "test_name" - self.output_mock.file_directory = "/test/dir/" + files = self.stream_extract.close() - self.test_stream.set_output_file("new_name.snippet") - self.output_mock.close.assert_called_once() self.assertEqual( - self.test_stream.output_stream.file_name, "new_name.snippet") + self.stream_extract.output_stream.file_name, test_filename) + self.assertEqual(len(files), 2) + self.assertIn(self.output_path, files, str(files)) + self.assertIn(test_path, files, str(files)) - def test_set_output_stream_same(self): + def test_close_same_file(self): """Setting the output stream to the same file should do nothing.""" - self.output_mock.file_name = "test_name" - self.output_mock.file_directory = "/test/dir/" - self.test_stream.set_output_file("test_name") - self.output_mock.close.assert_not_called() + self.output_stream.write("test output 1") + # Try setting same file + new_stream = self.stream_extract.set_output_file(self.output_filename) + new_stream.write("test output 2") -class TestSemiliterate(unittest.TestCase): + files = self.stream_extract.close() + + self.assertEqual( + self.stream_extract.output_stream.file_name, self.output_filename) + self.assertEqual(len(files), 1) + + def test_extract(self): + """Test extraction""" + self.stream_extract.patterns = [ + ExtractionPattern(start=r'START', stop=r'STOP') + ] + lines = ['Line 1', 'START', 'Extracted Text', 'STOP', 'Line 2'] + + # Set up mock behavior for input_stream + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + + # Perform the extraction + output_files = self.stream_extract.extract() + + # Assertions + self.assertEqual(len(output_files), 1) + # extracted text between start and stop + self.assertContentsEqual(self.output_path, ['Extracted Text']) + + def test_extract_no_patterns(self): + """Extraction without patterns should extract the whole file.""" + lines = ['Line 1', 'Line 2', 'Line 3'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # each line extracted + self.assertContentsEqual(self.output_path, lines) + + def test_extract_pattern_without_start(self): + """Extraction without the start attribute should extract until stop.""" + self.stream_extract.patterns = [ExtractionPattern(stop=r'STOP')] + lines = ['Line 1', 'START', 'STOP', 'Line 2'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # lines extracted until stop + self.assertContentsEqual(self.output_path, ['Line 1', 'START']) + + def test_extract_multiple_patterns(self): + """Test extraction with multiple patterns.""" + self.stream_extract.patterns = [ + ExtractionPattern(start=r'START1', stop=r'STOP1'), + ExtractionPattern(start=r'START2', stop=r'STOP2') + ] + lines = [ + 'Line 1', + 'START1', + 'Content 1', + 'STOP1', + 'START2', + 'Content 2', + 'STOP2', + 'Line 2'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # lines between start and stop extracted + self.assertContentsEqual(self.output_path, ['Content 1', 'Content 2']) + + def test_extract_empty_lines(self): + """Test extraction with empty lines in the input.""" + self.stream_extract.patterns = [ + ExtractionPattern( + start=r'START', + stop=r'STOP')] + lines = ['Line 1', '', 'START', '', 'Content', 'STOP', 'Line 2'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # empty line extracted + self.assertContentsEqual(self.output_path, ['', 'Content']) + + def test_extract_pattern_at_stop(self): + """Test extraction with a pattern matching at the end of input.""" + self.stream_extract.patterns = [ + ExtractionPattern( + start=r'START', + stop=r'STOP(.*)$')] + lines = ['Line 1', 'START', 'Content', 'STOP:Capture end'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # Lines and stop pattern extracted + self.assertContentsEqual(self.output_path, ['Content', ':Capture end']) + + def test_extract_pattern_at_start(self): + """Test extraction with a pattern matching at the end of input.""" + self.stream_extract.patterns = [ + ExtractionPattern( + start=r'START(.*)$', + stop=r'STOP')] + lines = ['Line 1', 'START:Capture start', 'Content', 'STOP:Capture end'] + input_stream_iterator = iter(lines) + self.input_stream.__iter__.return_value = input_stream_iterator + output_files = self.stream_extract.extract() + + # one file extracted + self.assertEqual(len(output_files), 1) + # Start pattern extracted plus content + self.assertContentsEqual( + self.output_path, + [':Capture start', 'Content']) + + +class TestSemiliterate(FakeFsTestCase): """Test the Semiliterate base class.""" - @patch('mkdocs_simple_plugin.semiliterate.StreamExtract') - def test_try_extraction_default(self, mock_stream_extract): - """Test extraction.""" - test_semiliterate = semiliterate.Semiliterate( - pattern=r".*") - mock_stream_extract.extract.return_value = True - mock = mock_open() - with patch('mkdocs_simple_plugin.semiliterate.open', - mock, - create=True) as patched: - self.assertIs(patched, mock) - self.assertTrue(test_semiliterate.try_extraction( - from_directory="/test/dir", - from_file="test_file.md", - destination_directory="/out/dir")) - mock.assert_called_once_with("/test/dir/test_file.md") - assert mock_stream_extract is semiliterate.StreamExtract - mock_stream_extract.called_once() - - @patch('mkdocs_simple_plugin.semiliterate.StreamExtract') - def test_try_extraction_skip(self, mock_stream_extract): - """Test skipping extraction for name mismatch with pattern filter""" - test_semiliterate = semiliterate.Semiliterate( - pattern=r".py") - mock_stream_extract.extract.return_value = True - mock = mock_open() - with patch('mkdocs_simple_plugin.semiliterate.open', - mock, - create=True) as patched: - self.assertIs(patched, mock) - self.assertFalse(test_semiliterate.try_extraction( - from_directory="/test/dir", - from_file="test_file.md", - destination_directory="/out/dir")) - mock.assert_not_called() - assert mock_stream_extract is semiliterate.StreamExtract - mock_stream_extract.assert_not_called() + def setUp(self): + """Set up fake filesystem.""" + self.setUpPyfakefs() + + def test_filename_match_with_match(self): + """Filename match with txt should match example.txt.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + match = test_semiliterate.filename_match('example.txt') + self.assertEqual(match, 'example.md') + + def test_filename_match_without_match(self): + """Filename match with txt should not match example.jpg.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + match = test_semiliterate.filename_match('example.jpg') + self.assertIsNone(match) + + def test_filename_match_with_destination(self): + """Test filename matching with a destination pattern.""" + test_semiliterate = Semiliterate(pattern=r'(.*)\.txt') + test_semiliterate.destination = r'\1_output.md' + name = "example.txt" + result = test_semiliterate.filename_match(name) + self.assertEqual(result, "example_output.md") + + def test_try_extraction_successful(self): + """Test extraction of a txt file to an md file should succeed.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + directory = "/source" + filename = "example.txt" + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/example.md" + self.fs.create_file(path, contents="Sample content") + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction failed + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) + + def test_try_extraction_no_match(self): + """Test extraction of a non matching file to an md file should fail.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + directory = "/source" + filename = "example.py" + path = os.path.join(directory, filename) + output = "/output" + self.fs.create_file(path, contents="Sample content") + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertFalse(result) # Extraction failed + self.assertListEqual(result, []) + + def test_try_extraction_io_error(self): + """Test extraction of a non existent file should fail.""" + semiliterate = Semiliterate(pattern=r'.*\.txt') + result = semiliterate.try_extraction( + from_directory='/source', + from_file='nonexistent.txt', + destination_directory='/output' + ) + self.assertFalse(result) # Extraction failed + self.assertListEqual(result, []) + + def test_unicode_filenames_and_content(self): + """Test behavior with non-ASCII filenames and content.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + directory = "/source" + filename = "mön.txt" # Non-ASCII filename + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/mön.md" # Expected output filename + + # Create and write content with non-ASCII characters + content = 'Non-ASCII content: mön\n' + self.fs.create_file(path, contents=content) + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction successful + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) + + def test_large_input_file(self): + """Test behavior with a large input file.""" + test_semiliterate = Semiliterate(pattern=r'.*\.txt') + directory = "/source" + filename = "large.txt" + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/large.md" # Expected output filename + + # Create a large content string (simulate a large file) + large_content = 'A' * 10_000_000 # 10 MB content + self.fs.create_file(path, contents=large_content) + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction successful + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) + + def test_extract_with_custom_termination_pattern(self): + """Test extraction with a custom termination pattern.""" + test_semiliterate = Semiliterate( + pattern=r'.*\.txt', + terminate=r'^END' # Custom termination pattern + ) + directory = "/source" + filename = "custom_termination.txt" + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/custom_termination.md" + + # Create content with the custom termination pattern + content = 'Content before END\nEND\nContent after END' + self.fs.create_file(path, contents=content) + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction successful + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) + + def test_extract_with_multiple_extraction_patterns(self): + """Test extraction with multiple extraction patterns.""" + test_semiliterate = Semiliterate( + pattern=r'.*\.txt', + extract=[ + { + "start": r'^START', + "stop": r'^STOP' + }, + { + "start": r'^BEGIN', + "stop": r'^END' + } + ] + ) + directory = "/source" + filename = "multiple_patterns.txt" + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/multiple_patterns.md" + + # Create content with multiple extraction patterns + content = 'Content before START\nSTART\nExtracted Text 1\nSTOP\n' \ + 'Content between BEGIN and END\nBEGIN\nExtracted Text 2\nEND' + self.fs.create_file(path, contents=content) + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction successful + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) + + def test_extract_with_destination_template(self): + """Test extraction with a destination template.""" + test_semiliterate = Semiliterate( + pattern=r'(.*)\.txt', + destination=r'\1_output.md' # Destination template + ) + directory = "/source" + filename = "template.txt" + path = os.path.join(directory, filename) + output = "/output" + expected_output_path = "/output/template_output.md" + + # Create content + content = 'Sample content' + self.fs.create_file(path, contents=content) + + result = test_semiliterate.try_extraction( + from_directory=directory, + from_file=filename, + destination_directory=output + ) + self.assertTrue(result) # Extraction successful + self.assertListEqual(result, [expected_output_path]) + self.assertTrue(self.fs.exists(expected_output_path)) if __name__ == '__main__':