From 18bfd8679302828258810bb215cf6401dfca7ea1 Mon Sep 17 00:00:00 2001 From: Nasty Date: Mon, 25 Sep 2023 15:12:31 +0300 Subject: [PATCH] some fixes --- .../_static/code_examples/dedoc_add_new_doc_type_tutorial.py | 2 +- docs/source/tutorials/add_new_doc_type.rst | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/_static/code_examples/dedoc_add_new_doc_type_tutorial.py b/docs/source/_static/code_examples/dedoc_add_new_doc_type_tutorial.py index 403667ba..b5d9a329 100644 --- a/docs/source/_static/code_examples/dedoc_add_new_doc_type_tutorial.py +++ b/docs/source/_static/code_examples/dedoc_add_new_doc_type_tutorial.py @@ -24,7 +24,7 @@ file_mime = mimetypes.guess_type(file_path)[0] djvu_converter.can_convert(file_extension, file_mime) # True -djvu_converter.do_convert(file_dir, name_wo_extension, file_extension) # 'example_with_table7.pdf' +djvu_converter.do_convert(file_dir, name_wo_extension, file_extension) # 'example_with_table.pdf' file_dir, file_name = "test_dir", "example_with_attachments_depth_1.pdf" file_path = os.path.join(file_dir, file_name) diff --git a/docs/source/tutorials/add_new_doc_type.rst b/docs/source/tutorials/add_new_doc_type.rst index 07fe7b5c..61417e38 100644 --- a/docs/source/tutorials/add_new_doc_type.rst +++ b/docs/source/tutorials/add_new_doc_type.rst @@ -101,6 +101,11 @@ General scheme of adding AttachmentExtractor def __init__(self) -> None: self.attachment_extractor = PdfAttachmentsExtractor() + def read(self, path: str, document_type: Optional[str] = None, parameters: Optional[dict] = None) -> UnstructuredDocument: + # some code + attachments = self.attachment_extractor.get_attachments(tmpdir, filename, parameters) + # some code + Example of adding pdf/djvu handlers -----------------------------------