From 38259765fb49c58ab5166c019d337bf0addd23ea Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Fri, 14 May 2021 18:10:28 +0200
Subject: [PATCH 01/10] Unable to find a XML parser allow full recovery of XML
 so this module should to the trick even though not most efficient way.

---
 evernote_to_sqlite/hugexmlparser.py | 226 ++++++++++++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100644 evernote_to_sqlite/hugexmlparser.py

diff --git a/evernote_to_sqlite/hugexmlparser.py b/evernote_to_sqlite/hugexmlparser.py
new file mode 100644
index 0000000..bdc7654
--- /dev/null
+++ b/evernote_to_sqlite/hugexmlparser.py
@@ -0,0 +1,226 @@
+import re
+from typing import Union, List, BinaryIO, Annotated, Tuple
+
+
+def read_recovery_file(fn="/tmp/records.pickle") -> set:
+    try:
+        with open(fn, "rb") as f:
+            records = pickle.load(f)
+    except (FileNotFoundError, EOFError):
+        records = set()
+    return records
+
+
+def update_recovery_file(records, fn="/tmp/records.pickle"):
+    with open(fn, "wb") as f:
+        pickle.dump(records, f)
+
+
+class HugeXmlParser:
+    def __init__(self, filename: str, tag: str = "note", max_size_mb: int = 30):
+        """
+        Class for handling big malformed XML files
+        Args:
+            filename: Input file
+            tag:  The "root" tag you like to retrieve from the XML
+            max_size_mb: The maximum size allowed once discovering the tag before carrying on to next
+        """
+        self.exceed_max = 0
+        self.new_start = 0
+        self.filename = filename
+        self.tag = tag
+        self.max_size_mb = max_size_mb
+
+    @staticmethod
+    def split_and_strip(whole_chunk: Union[str, bytes], tag: str = "note") -> List:
+        """
+
+        Args:
+            whole_chunk: Input str or bytes
+            tag: The tag to split upon
+
+        Returns: List of chunk based on tag
+
+        """
+        if type(whole_chunk) is bytes:
+            whole_chunk = whole_chunk.decode()
+        chunks = re.split(fr"</?{tag}>", whole_chunk)
+        chunks = [_ for _ in chunks if _.strip()]
+        return chunks
+
+    def split_multiple_tag_chunk(
+        self, whole_chunk: Union[str, bytes], tag: str = "note"
+    ) -> str:
+        """
+        Split and yield tags from str or bytes
+        Args:
+            whole_chunk: Input str or bytes
+            tag: Tag to split out from whole_chunk
+
+        Returns: yields str
+
+        """
+        chunks = self.split_and_strip(whole_chunk, tag)
+        for chunk in chunks:
+            yield "".join([f"<{tag}>", chunk, f"</{tag}>"])
+
+    def escape_single_tag(self, whole_chunk, tag="content"):
+        chunks = self.split_and_strip(whole_chunk, tag)
+        if len(chunks) == 3:
+            return "".join(
+                [
+                    chunks[0],
+                    f"<{tag}>",
+                    "<![CDATA[",
+                    re.escape(chunks[1]),
+                    "]]>",
+                    f"</{tag}>",
+                    chunks[2],
+                ]
+            )
+
+    def tag_in_chunk(self, chunk: bytes) -> str:
+        """
+        Checks whether either start or end tag exists in tag used in class constructor.
+        Args:
+            chunk: The input bytes
+
+        Returns:
+
+        """
+        if f"<{self.tag}>".encode() in chunk:
+            return "start"
+        if f"</{self.tag}>".encode() in chunk:
+            return "end"
+
+    @staticmethod
+    def get_chunk_size(tag: str) -> int:
+        """
+        Return the number of bytes required to capture either start or end tag
+        Args:
+            tag: Tag to estimate size of
+
+        Returns: Number of bytes
+
+        """
+        start, end = [f"<{tag}>", f"</{tag}>"]
+        return max([len(_) for _ in (start, end)])
+
+    def yield_tag(self, start_pos: int = 0) -> bytes:
+        """
+        Yield chunks of bytes covering the tag within the XML
+        Args:
+            start_pos: Instead of starting from beginning start the byte position
+
+        Returns: Bytes including start and end tag
+
+        """
+        chunk_size = self.get_chunk_size(self.tag)
+        index_content = 0
+        with open(self.filename, "rb") as f:
+            pos = start_pos
+            while True:
+                pos += 1
+                f.seek(pos)
+                chunk = f.read(chunk_size)
+                if chunk == b"":
+                    break
+                if self.tag_in_chunk(chunk):
+                    if self.tag_in_chunk(chunk) == "start":
+                        index_content += 1
+                        pos = yield from self.yield_content_until_end(
+                            chunk,
+                            chunk_size,
+                            f,
+                            index_content,
+                            pos,
+                        )
+
+    def get_next_chunk_without_end(
+        self, f: BinaryIO, pos: int, big_chunk: int = 1_000, margin: int = 10
+    ) -> Tuple[int, Union[int, bytes]]:
+        """
+        Returns chunk of bytes that doesn't have a end-tag within the big_chunk size
+        Args:
+            f: File-pointer
+            pos: Byte pointer
+            big_chunk: Size in bytes to check
+            margin: The margin at end excluded to avoid miss-match on end-tag
+
+        Returns:
+
+        """
+        f.seek(pos)
+        read_chunk_excluding_margin = f.read(big_chunk)[:-margin]
+        if not self.tag_in_chunk(read_chunk_excluding_margin):
+            pos += big_chunk - margin
+            return pos, read_chunk_excluding_margin
+        else:
+            return pos, None
+
+    def yield_content_until_end(
+        self, chunk: bytes, chunk_size: int, f: BinaryIO, index_content: int, pos: int
+    ) -> List[Annotated[int, "Start byte"], Annotated[int, "End byte"], bytes]:
+        """
+        Yields bytes until end tag reached
+        Args:
+            chunk: Current chunk
+            chunk_size: Size in bytes to iterate
+            f: Input file-pointer
+            index_content: Current index number of content recovered
+            pos: Current byte position
+
+        Returns: List [start position, end position, bytes]
+
+        """
+        result = b""
+        abort = False
+        current_pos = 0
+        last_megabyte_progress = 0
+        start_pos = pos
+        while self.tag_in_chunk(chunk) != "end":
+            pos += 1
+            # break if no data left
+            if f.read(1) == "":
+                break
+            # get next big chunks without end-tag
+            while True:
+                pos, big_chunk_with_no_end = self.get_next_chunk_without_end(f, pos)
+                # print(f"pos: {pos}, big_chunk_with_no_end: {big_chunk_with_no_end}")
+                if big_chunk_with_no_end:
+                    result += big_chunk_with_no_end
+                    current_pos += len(big_chunk_with_no_end)
+                    new_megabyte_progress = int(round(current_pos, -6) / 1_000_000)
+                    if new_megabyte_progress is not last_megabyte_progress:
+                        print(
+                            f"processing current content {index_content}: {new_megabyte_progress} MB"
+                        )
+                        last_megabyte_progress = new_megabyte_progress
+                else:
+                    break
+                if last_megabyte_progress >= self.max_size_mb:
+                    print(f"Exceeding max size of {self.max_size_mb}, breaking")
+                    self.exceed_max += 1
+                    abort = True
+                    break
+            # carry on byte per byte now when end tag discovered
+            f.seek(pos)
+            result += f.read(1)
+            chunk = f.read(chunk_size)
+            if (
+                self.tag_in_chunk(chunk) == "start"
+                and f"</{self.tag}>" not in result.decode()
+            ):
+                print("Found new start, ending previous")
+                end_tag = f"</{self.tag}>".encode()
+                yield start_pos, pos + len(end_tag), result + end_tag
+                pos -= 1
+                abort = True
+                self.new_start += 1
+                break
+            if abort:
+                break
+        if not abort:
+            # Return start, end and chunk
+            yield start_pos + 1, pos + len(chunk) + 1, result + chunk
+        return pos

From 23d04ed2132fae6478a4fec0670b5c174da2736b Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Fri, 14 May 2021 18:10:59 +0200
Subject: [PATCH 02/10] Integrating hugexml parser and techniques allowing
 recovering large Enex files.

---
 evernote_to_sqlite/cli.py | 109 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 2 deletions(-)

diff --git a/evernote_to_sqlite/cli.py b/evernote_to_sqlite/cli.py
index 5c720f3..6bd712e 100644
--- a/evernote_to_sqlite/cli.py
+++ b/evernote_to_sqlite/cli.py
@@ -2,12 +2,27 @@
 import click
 import os
 from .utils import find_all_tags, save_note, ensure_indexes
+import time
+import datetime as dt
+from .hugexmlparser import HugeXmlParser, read_recovery_file, update_recovery_file
+import logging
+from rich.logging import RichHandler
+from rich.progress import Progress
+import lxml
+
+FORMAT = "%(message)s"
+logging.basicConfig(
+    level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
+)
+logger = logging.getLogger(__name__)
+
+MEGABYTE = 1_000_000
 
 
 @click.group()
 @click.version_option()
 def cli():
-    "Tools for converting Evernote content to SQLite"
+    """Tools for converting Evernote content to SQLite"""
 
 
 @cli.command()
@@ -22,7 +37,7 @@ def cli():
     required=True,
 )
 def enex(db_path, enex_file):
-    "Convert Evernote .enex exports to SQLite"
+    """Convert Evernote .enex exports to SQLite"""
     file_length = os.path.getsize(enex_file)
     fp = open(enex_file, "r", encoding="utf-8")
     db = sqlite_utils.Database(db_path)
@@ -31,3 +46,93 @@ def enex(db_path, enex_file):
             save_note(db, note)
     fp.close()
     ensure_indexes(db)
+
+
+@cli.command()
+@click.argument(
+    "db_path",
+    type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
+    required=True,
+)
+@click.argument(
+    "enex_file",
+    type=click.Path(exists=True, file_okay=True, dir_okay=False, allow_dash=False),
+    required=True,
+)
+@click.option(
+    "--max_note_size",
+    type=click.INT,
+    required=False,
+    default=30,
+    help="This maximum size on MB attempting to discover end-tag of recognised note before skipping to next.",
+)
+@click.option(
+    "--resume_file",
+    type=click.Path(),
+    required=False,
+    help="Allows resume where conversion was aborted/failed. File will be created if it does not exist and will register start, end byte in Enex file.",
+)
+def recover_enex(db_path, enex_file, max_note_size=30, resume_file=None):
+    """Use recover techniques allowing malformed Evernote exports to be transformed to SQLite and specficially useful for very large Enex file. Be warned that this takes a very long time for larges Enex files."""
+
+    with Progress() as progress:
+        task1 = progress.add_task("[red]Downloading...", total=1000)
+        task2 = progress.add_task("[green]Processing...", total=1000)
+        task3 = progress.add_task("[cyan]Cooking...", total=1000)
+
+        while not progress.finished:
+            progress.update(task1, advance=0.5)
+            progress.update(task2, advance=0.3)
+            progress.update(task3, advance=0.9)
+            progress.console.print(f"Working on job #{dt.datetime.now().isoformat()}")
+            time.sleep(0.01)
+
+    file_length = os.path.getsize(enex_file)
+    db = sqlite_utils.Database(db_path)
+    fp = open(enex_file, "r", encoding="utf-8")
+
+    records = read_recovery_file()
+    last_start = sorted(records)[-1][0] if records else 0
+    count = len(records) - 1
+    splitted = 0
+    content_escaped = 0
+
+    xml_parser = HugeXmlParser(enex_file)
+
+    for start_pos, end_pos, data in xml_parser.yield_tag(start_pos=last_start):
+        logger.info(
+            f"{count}: {round(len(data) / MEGABYTE, 1)} MB,"
+            f"recovered: {xml_parser.new_start}, exceed max size: {xml_parser.exceed_max}"
+        )
+        records.add((start_pos, end_pos))
+        update_recovery_file(records)
+        notes = []
+        try:
+            notes.append(etree.fromstring(data))
+        except lxml.etree.XMLSyntaxError as e:
+            logger.error(e)
+            logger.warning("potential multiple notes breaking these up")
+            splitted += 1
+            for data_chunk in xml_parser.split_multiple_tag_chunk(data):
+                try:
+                    data_chunk = etree.fromstring(data_chunk)
+                except lxml.etree.XMLSyntaxError as e:
+                    logger.debug(e)
+                    logger.warning("invalid xml, attempt to escaping content-tag")
+                    data_chunk = xml_parser.escape_single_tag(data_chunk, "content")
+                    content_escaped += 1
+                    data_chunk = lxml.etree.fromstring(data_chunk)
+                notes.append(data_chunk)
+        for note in notes:
+            save_note(db, note)
+            print(f"saved {count}")
+            count += 1
+    logger.info(f"Notes with new start generated: {xml_parser.new_start}")
+    logger.info(f"Notes that exceeded the maximum size: {xml_parser.exceed_max}")
+    logger.info(f"Notes that were found but required splitting: {splitted}")
+    logger.info(
+        f"Notes found where <content> tag required to be escaped: {content_escaped}"
+    )
+
+    fp.close()
+    ensure_indexes(db)

From 317c01d23025ff7f005af72e2c32e0e6577c0597 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Fri, 14 May 2021 18:11:13 +0200
Subject: [PATCH 03/10] Adding dependencies to requirements.txt

---
 requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..19a7e59
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+click~=8.0.0
+setuptools~=56.2.0
+rich~=10.2.0
+lxml~=4.6.3
\ No newline at end of file

From d04a3b7bc08c8e5e2317e0ab2139f0c84d1f3483 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 08:40:39 +0200
Subject: [PATCH 04/10] Adding support for progressbar while parsing large
 individual notes

---
 evernote_to_sqlite/hugexmlparser.py | 33 +++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/evernote_to_sqlite/hugexmlparser.py b/evernote_to_sqlite/hugexmlparser.py
index bdc7654..d72f96e 100644
--- a/evernote_to_sqlite/hugexmlparser.py
+++ b/evernote_to_sqlite/hugexmlparser.py
@@ -1,12 +1,14 @@
 import re
-from typing import Union, List, BinaryIO, Annotated, Tuple
+from typing import Union, List, BinaryIO, Tuple
+from typing_extensions import Annotated
+import pickle
 
 
 def read_recovery_file(fn="/tmp/records.pickle") -> set:
     try:
         with open(fn, "rb") as f:
             records = pickle.load(f)
-    except (FileNotFoundError, EOFError):
+    except (FileNotFoundError, EOFError, TypeError):
         records = set()
     return records
 
@@ -17,7 +19,7 @@ def update_recovery_file(records, fn="/tmp/records.pickle"):
 
 
 class HugeXmlParser:
-    def __init__(self, filename: str, tag: str = "note", max_size_mb: int = 30):
+    def __init__(self, filename: str, tag: str = "note", max_size_mb: int = 30, progress_bar=None):
         """
         Class for handling big malformed XML files
         Args:
@@ -30,6 +32,15 @@ def __init__(self, filename: str, tag: str = "note", max_size_mb: int = 30):
         self.filename = filename
         self.tag = tag
         self.max_size_mb = max_size_mb
+        self.progess_bar = progress_bar
+        if self.progess_bar:
+            self.note_progress = self.progess_bar.add_task("[red]Parsing note...", total=self.max_size_mb)
+
+    def print(self, text):
+        if self.progess_bar:
+            self.progess_bar.console.print(text)
+        else:
+            print(text)
 
     @staticmethod
     def split_and_strip(whole_chunk: Union[str, bytes], tag: str = "note") -> List:
@@ -158,9 +169,17 @@ def get_next_chunk_without_end(
         else:
             return pos, None
 
+    def update_progress_bar(self, current_megabyte):
+        if not self.progess_bar:
+            return
+        if current_megabyte > 0:
+            self.progess_bar.update(self.note_progress, completed=current_megabyte)
+        else:
+            self.progess_bar.reset(self.note_progress)
+
     def yield_content_until_end(
         self, chunk: bytes, chunk_size: int, f: BinaryIO, index_content: int, pos: int
-    ) -> List[Annotated[int, "Start byte"], Annotated[int, "End byte"], bytes]:
+    ) -> Tuple[Annotated[int, "Start byte"], Annotated[int, "End byte"], bytes]:
         """
         Yields bytes until end tag reached
         Args:
@@ -186,20 +205,20 @@ def yield_content_until_end(
             # get next big chunks without end-tag
             while True:
                 pos, big_chunk_with_no_end = self.get_next_chunk_without_end(f, pos)
-                # print(f"pos: {pos}, big_chunk_with_no_end: {big_chunk_with_no_end}")
                 if big_chunk_with_no_end:
                     result += big_chunk_with_no_end
                     current_pos += len(big_chunk_with_no_end)
                     new_megabyte_progress = int(round(current_pos, -6) / 1_000_000)
                     if new_megabyte_progress is not last_megabyte_progress:
-                        print(
+                        self.print(
                             f"processing current content {index_content}: {new_megabyte_progress} MB"
                         )
+                        self.update_progress_bar(new_megabyte_progress)
                         last_megabyte_progress = new_megabyte_progress
                 else:
                     break
                 if last_megabyte_progress >= self.max_size_mb:
-                    print(f"Exceeding max size of {self.max_size_mb}, breaking")
+                    self.print(f"Exceeding max size of {self.max_size_mb}, breaking")
                     self.exceed_max += 1
                     abort = True
                     break

From 7883f3dc436935e5b60e94e08406b94b712c8c3d Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 08:42:02 +0200
Subject: [PATCH 05/10] Changes made to recover_enex to support parsing of
 large ENEX files

---
 evernote_to_sqlite/cli.py | 132 ++++++++++++++++++++++----------------
 1 file changed, 78 insertions(+), 54 deletions(-)

diff --git a/evernote_to_sqlite/cli.py b/evernote_to_sqlite/cli.py
index 6bd712e..2f03dfd 100644
--- a/evernote_to_sqlite/cli.py
+++ b/evernote_to_sqlite/cli.py
@@ -1,14 +1,19 @@
 import sqlite_utils
 import click
 import os
-from .utils import find_all_tags, save_note, ensure_indexes
-import time
-import datetime as dt
-from .hugexmlparser import HugeXmlParser, read_recovery_file, update_recovery_file
 import logging
 from rich.logging import RichHandler
 from rich.progress import Progress
 import lxml
+from lxml import etree
+import sys
+try:
+    from .utils import find_all_tags, save_note, save_note_recovery, ensure_indexes, human_size
+    from .hugexmlparser import HugeXmlParser, read_recovery_file, update_recovery_file
+except ModuleNotFoundError:
+    # workaround for PyCharm
+    from utils import find_all_tags, save_note, save_note_recovery, ensure_indexes, human_size
+    from hugexmlparser import HugeXmlParser, read_recovery_file, update_recovery_file
 
 FORMAT = "%(message)s"
 logging.basicConfig(
@@ -25,6 +30,7 @@ def cli():
     """Tools for converting Evernote content to SQLite"""
 
 
+# noinspection SpellCheckingInspection
 @cli.command()
 @click.argument(
     "db_path",
@@ -70,69 +76,87 @@ def enex(db_path, enex_file):
     "--resume_file",
     type=click.Path(),
     required=False,
-    help="Allows resume where conversion was aborted/failed. File will be created if it does not exist and will register start, end byte in Enex file.",
+    help="Allows resume where conversion was aborted/failed."
+         "File will be created if it does not exist and will register start, end byte in Enex file.",
 )
 def recover_enex(db_path, enex_file, max_note_size=30, resume_file=None):
-    """Use recover techniques allowing malformed Evernote exports to be transformed to SQLite and specficially useful for very large Enex file. Be warned that this takes a very long time for larges Enex files."""
+    """Use recover techniques allowing malformed Evernote exports to be transformed to SQLite
+    and specifically useful for very large Enex file. Be warned that this takes
+    a very long time for larges Enex files."""
 
-    with Progress() as progress:
-        task1 = progress.add_task("[red]Downloading...", total=1000)
-        task2 = progress.add_task("[green]Processing...", total=1000)
-        task3 = progress.add_task("[cyan]Cooking...", total=1000)
-
-        while not progress.finished:
-            progress.update(task1, advance=0.5)
-            progress.update(task2, advance=0.3)
-            progress.update(task3, advance=0.9)
-            progress.console.print(f"Working on job #{dt.datetime.now().isoformat()}")
-            time.sleep(0.01)
+    # with Progress() as progress:
+    #     task1 = progress.add_task("[red]Downloading...", total=1000)
+    #     task2 = progress.add_task("[green]Processing...", total=1000)
+    #     task3 = progress.add_task("[cyan]Cooking...", total=1000)
+    #
+    #     while not progress.finished:
+    #         progress.update(task1, advance=0.5)
+    #         progress.update(task2, advance=0.3)
+    #         progress.update(task3, advance=0.9)
+    #         progress.console.print(f"Working on job #{dt.datetime.now().isoformat()}")
+    #         time.sleep(0.01)
 
     file_length = os.path.getsize(enex_file)
     db = sqlite_utils.Database(db_path)
     fp = open(enex_file, "r", encoding="utf-8")
 
-    records = read_recovery_file()
-    last_start = sorted(records)[-1][0] if records else 0
+    records = read_recovery_file(resume_file)
+    current_position = sorted(records)[-1][0] if records else 0
     count = len(records) - 1
     splitted = 0
     content_escaped = 0
 
-    xml_parser = HugeXmlParser(enex_file)
+    with Progress() as progress:
+        all_tasks = progress.add_task(f"[red]Processing Evernote export file {human_size(file_length)}...", total=file_length)
+        xml_parser = HugeXmlParser(enex_file, max_size_mb=max_note_size, progress_bar=progress)
+
+        while not progress.finished:
+            try:
+                start_pos, end_pos, data = next(xml_parser.yield_tag(start_pos=current_position))
+            except StopIteration:
+                break
+
+            progress.update(all_tasks, completed=end_pos)
+            current_position = end_pos
 
-    for start_pos, end_pos, data in xml_parser.yield_tag(start_pos=last_start):
+            progress.console.print(
+                f"{count}: {round(len(data) / MEGABYTE, 1)} MB,"
+                f"recovered: {xml_parser.new_start}, exceed max size: {xml_parser.exceed_max}"
+            )
+            records.add((start_pos, end_pos))
+            if resume_file:
+                update_recovery_file(records, resume_file)
+            notes = []
+            try:
+                notes.append(lxml.etree.fromstring(data))
+            except lxml.etree.XMLSyntaxError as e:
+                progress.console.print(e)
+                progress.console.print("potential multiple notes breaking these up")
+                splitted += 1
+                for data_chunk in xml_parser.split_multiple_tag_chunk(data):
+                    try:
+                        data_chunk = lxml.etree.fromstring(data_chunk)
+                    except lxml.etree.XMLSyntaxError as e:
+                        progress.console.print(e)
+                        progress.console.print("invalid xml, attempt to escaping content-tag")
+                        data_chunk = xml_parser.escape_single_tag(data_chunk, "content")
+                        content_escaped += 1
+                        data_chunk = lxml.etree.fromstring(data_chunk)
+                    notes.append(data_chunk)
+            for note in notes:
+                save_note_recovery(db, note)
+                count += 1
+
+        logger.info(f"Notes with new start generated: {xml_parser.new_start}")
+        logger.info(f"Notes that exceeded the maximum size: {xml_parser.exceed_max}")
+        logger.info(f"Notes that were found but required splitting: {splitted}")
         logger.info(
-            f"{count}: {round(len(data) / MEGABYTE, 1)} MB,"
-            f"recovered: {xml_parser.new_start}, exceed max size: {xml_parser.exceed_max}"
+            f"Notes found where <content> tag required to be escaped: {content_escaped}"
         )
-        records.add((start_pos, end_pos))
-        update_recovery_file(records)
-        notes = []
-        try:
-            notes.append(etree.fromstring(data))
-        except lxml.etree.XMLSyntaxError as e:
-            logger.error(e)
-            logger.warning("potential multiple notes breaking these up")
-            splitted += 1
-            for data_chunk in xml_parser.split_multiple_tag_chunk(data):
-                try:
-                    data_chunk = etree.fromstring(data_chunk)
-                except lxml.etree.XMLSyntaxError as e:
-                    logger.debug(e)
-                    logger.warning("invalid xml, attempt to escaping content-tag")
-                    data_chunk = xml_parser.escape_single_tag(data_chunk, "content")
-                    content_escaped += 1
-                    data_chunk = lxml.etree.fromstring(data_chunk)
-                notes.append(data_chunk)
-        for note in notes:
-            save_note(db, note)
-            print(f"saved {count}")
-            count += 1
-    logger.info(f"Notes with new start generated: {xml_parser.new_start}")
-    logger.info(f"Notes that exceeded the maximum size: {xml_parser.exceed_max}")
-    logger.info(f"Notes that were found but required splitting: {splitted}")
-    logger.info(
-        f"Notes found where <content> tag required to be escaped: {content_escaped}"
-    )
 
-    fp.close()
-    ensure_indexes(db)
+        fp.close()
+        ensure_indexes(db)
+
+
+if __name__ == '__main__':
+    cli(sys.argv[1:])

From bb65c5ec1a4bffd7275be39d00043bee89220448 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 08:42:47 +0200
Subject: [PATCH 06/10] Adding function to allow resuming of an already started
 recovery process

---
 evernote_to_sqlite/utils.py | 46 +++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/evernote_to_sqlite/utils.py b/evernote_to_sqlite/utils.py
index 398cba7..7b6d96f 100644
--- a/evernote_to_sqlite/utils.py
+++ b/evernote_to_sqlite/utils.py
@@ -113,3 +113,49 @@ def resolve_entities(s):
     return _entities_re.sub(
         lambda m: html.entities.entitydefs.get(m.group(1), m.group(1)), s
     )
+
+
+def save_note_recovery(db, note):
+    title = note.find("title").text
+    created = note.find("created").text
+    if note.find("updated") is not None:
+        updated = note.find("updated").text
+    else:
+        updated = created
+    content = note.find("content").text
+    row = {
+        "title": title,
+        "content": content,
+        "created": convert_datetime(created),
+        "updated": convert_datetime(updated),
+    }
+    attributes = note.find("note-attributes")
+    if attributes is not None:
+        row.update({attribute.tag: attribute.text for attribute in attributes})
+    # If any of those attributes end in -date, e.g. 'subject-date', convert them
+    for key in row:
+        if key.endswith("-date"):
+            row[key] = convert_datetime(row[key])
+    note_id = db["notes"].insert(row, hash_id="id", replace=True, alter=True).last_pk
+    # Now do the resources
+    for resource in note.findall("resource"):
+        resource_id = save_resource(db, resource)
+        db["note_resources"].insert(
+            {
+                "note_id": note_id,
+                "resource_id": resource_id,
+            },
+            pk=("note_id", "resource_id"),
+            foreign_keys=("note_id", "resource_id"),
+            replace=True,
+        )
+
+
+def human_size(bytes, units=[" bytes", "KB", "MB", "GB", "TB", "PB", "EB"]):
+    return (
+        str(bytes) + units[0]
+        if bytes < 1024
+        else human_size(bytes >> 10, units[1:])
+        if units[1:]
+        else f"{bytes>>10}ZB"
+    )

From 27610cc999c293ab87284dff098c3ad560c5e94e Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 08:43:27 +0200
Subject: [PATCH 07/10] Adding help to describing the recover enex command

---
 README.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/README.md b/README.md
index 08ee34d..fcdeea4 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,56 @@ You can convert that file to SQLite like so:
 
 This will display a progress bar and create a SQLite database file called `evernote.db`.
 
+In situations where the ENEX file being malformed 
+or size of notes grown bigger than the optimised XML parser
+you have an option to run in recovery mode that will use methods
+that will allow the process to carry on through all notes.
+
+    $ evernote-to-sqlite recover-enex evernote.db MyNotes.enex
+    
+If you have very large file you can also supply a resume-file that allows
+the process to process where it left of in such case of interruption.
+
+```shell script
+$ evernote-to-sqlite recover-enex --help                                                                   
+Usage: evernote-to-sqlite recover-enex [OPTIONS] DB_PATH ENEX_FILE
+
+  Use recover techniques allowing malformed Evernote exports to be transformed
+  to SQLite and specifically useful for very large Enex file. Be warned that
+  this takes a very long time for larges Enex files.
+
+Options:
+  --max_note_size INTEGER  This maximum size on MB attempting to discover end-
+                           tag of recognised note before skipping to next.
+  --resume_file PATH       Allows resume where conversion was
+                           aborted/failed.File will be created if it does not
+                           exist and will register start, end byte in Enex
+                           file.
+  --help                   Show this message and exit.
+
+$ evernote-to-sqlite recover-enex evernote.db MyNotes.enex --max_note_size 30 --resume_file my_resume_file
+
+...
+
+5763: 0.3 MB,recovered: 0, exceed max size: 16
+processing current content 1: 1 MB
+processing current content 1: 2 MB
+processing current content 1: 3 MB
+5764: 3.2 MB,recovered: 0, exceed max size: 16
+5765: 0.0 MB,recovered: 0, exceed max size: 16
+processing current content 1: 1 MB
+processing current content 1: 2 MB
+processing current content 1: 3 MB
+5766: 3.4 MB,recovered: 0, exceed max size: 16
+[07:22:40] INFO     Notes with new start generated: 0                                                                                                             cli.py:150
+[07:22:41] INFO     Notes that exceeded the maximum size: 16                                                                                                      cli.py:151
+           INFO     Notes that were found but required splitting: 51                                                                                              cli.py:152
+           INFO     Notes found where <content> tag required to be escaped: 7                                                                                     cli.py:154
+Processing Evernote export file 5GB... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 100% 0:00:01
+Parsing note...                        ━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  10% 0:00:00
+```
+
+
 ### Limitations
 
 Unfortunately the ENEX export format does not include a unique identifier for each note. This means you cannot use this tool to re-import notes after they have been updated - you should consider this tool to be a one-time transformation of an ENEX file into an equivalent SQLite database.

From a47a5a868aa7d00146fc1ac910f513245cf4baf4 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 09:11:56 +0200
Subject: [PATCH 08/10] Adding tests for recover-enex

---
 tests/example-note_broken.enex   | 177 +++++++++++++++++++++++++++
 tests/test_evernote_to_sqlite.py | 197 +++++++++++++++++++++++++++++++
 2 files changed, 374 insertions(+)
 create mode 100644 tests/example-note_broken.enex

diff --git a/tests/example-note_broken.enex b/tests/example-note_broken.enex
new file mode 100644
index 0000000..583e92a
--- /dev/null
+++ b/tests/example-note_broken.enex
@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export3.dtd">
+<en-export export-date="20201011T235248Z" application="Evernote" version="Evernote Mac 7.14 (458265)">
+<note><title>Example note with images</title><content><![CDATA[<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>This note includes two images. &scaron;.</div><div><br /></div><div><span style="font-weight: bold;">The Python logo</span></div><div><br /></div><div><en-media hash="61098c2c541de7f0a907c301dd6542da" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div><span style="font-weight: bold;">The Evernote logo</span></div><div><br /></div><div><en-media hash="91bd26175acac0b2ffdb6efac199f8ca" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div>This image contains text:</div><div><br /></div><div><en-media hash="76dd28b07797cc9f3f129....BROKEN......]]></content><created>20201011T212822Z</created><updated>20201011T233038Z</updated><note-attributes><latitude>37.77742571705006</latitude><longitude>-122.4256495114116</longitude><altitude>23.16121864318848</altitude><author>Simon Willison</author><source>desktop.mac</source><reminder-order>0</reminder-order></note-attributes><resource><data encoding="base64">PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8v
+d3d3LnczLm9yZy8xOTk5L3hsaW5rIgphcmlhLWxhYmVsPSJQeXRob24iIHJvbGU9ImltZyIKdmlld0Jv
+eD0iMCAwIDUxMiA1MTIiPjxyZWN0CndpZHRoPSI1MTIiIGhlaWdodD0iNTEyIgpyeD0iMTUlIgpmaWxs
+PSIjZmZmIi8+PGcgZmlsbD0iIzVhOWZkNCI+PHBhdGggaWQ9InAiIGQ9Ik0yNTQgNjRjLTE2IDAtMzEg
+MS00NCA0LTM5IDctNDYgMjEtNDYgNDd2MzVoOTJ2MTJIMTMwYy0yNyAwLTUwIDE2LTU4IDQ2LTggMzUt
+OCA1NyAwIDkzIDcgMjggMjMgNDcgNDkgNDdoMzJ2LTQyYzAtMzAgMjYtNTcgNTctNTdoOTFjMjYgMCA0
+Ni0yMSA0Ni00NnYtODhjMC0yNC0yMS00My00Ni00Ny0xNS0zLTMyLTQtNDctNHptLTUwIDI4YzEwIDAg
+MTcgOCAxNyAxOCAwIDktNyAxNy0xNyAxNy05IDAtMTctOC0xNy0xNyAwLTEwIDgtMTggMTctMTh6Ii8+
+PC9nPjx1c2UgeGxpbms6aHJlZj0iI3AiIGZpbGw9IiNmZmQ0M2IiIHRyYW5zZm9ybT0icm90YXRlKDE4
+MCwyNTYsMjU1KSIvPjwvc3ZnPg==</data><mime>image/svg+xml</mime><width>0</width><height>0</height><duration>0</duration><resource-attributes><timestamp>19700101T000000Z</timestamp></resource-attributes></resource><resource><data encoding="base64">PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciCmFyaWEtbGFiZWw9IkV2ZXJub3Rl
+IiByb2xlPSJpbWciCnZpZXdCb3g9IjAgMCA1MTIgNTEyIj48cmVjdAp3aWR0aD0iNTEyIiBoZWlnaHQ9
+IjUxMiIKcng9IjE1JSIKZmlsbD0iIzQ2Yzg1MCIvPjxwYXRoIGQ9Im0xMjEgMTQzaDM1YzMgMCA0LTEg
+NC00bC0xLTM4YzAtMTAgNi0xOSA2LTE5aC0xbC02OCA2N3YxczEwLTcgMjUtN3ptMjcxLTZjLTMtMTUt
+MTItMjMtMjAtMjUtMzItOC02NS0xMi05OC0xMS0yLTE5LTE4LTI5LTU0LTI5LTMxLTEtNDkgNi00OSAy
+OXYzOWMwIDgtNSAxMy0xNCAxM2gtMzRjLTcgMC0xMyAyLTE4IDQtNCAyLTE0IDctMTQgMzAtMSAxOSAx
+MyA5NSAyMyAxMTUgMyA5IDYgMTIgMTQgMTUgMTYgOCA1NCAxNSA3MyAxOCAxNyAyIDI4IDYgMzYtOCAy
+LTQgMTAtMzAgOS01MiAwLTEgMi0yIDIgMCAwIDctMiAzNiAxOSA0M2w0NSA5YzE2IDEgMjggNyAyOCA0
+OSAwIDI1LTYgMjgtMzQgMjgtMjIgMC0zMCAxLTMwLTE3IDAtMTQgMTQtMTMgMjUtMTMgNCAwIDEtMyAx
+LTEyczUtMTQgMC0xNGMtMzYtMS01OCAwLTU4IDQ1IDAgNDIgMTYgNDkgNjggNDkgNDAgMCA1NS0xIDcx
+LTUyIDI1LTc4IDE4LTIwNSA5LTI1M3ptLTQ2IDExNWMtNS02LTMxLTgtNDAtNCAyLTEwIDYtMjIgMjIt
+MjIgMTUgMCAxOCAxNiAxOCAyNnoiIGZpbGw9IiM0YjRiNGIiLz48L3N2Zz4=</data><mime>image/svg+xml</mime><width>0</width><height>0</height><duration>0</duration><resource-attributes><timestamp>19700101T000000Z</timestamp></resource-attributes></resource><resource><data encoding="base64">iVBORw0KGgoAAAANSUhEUgAAAp4AAACACAIAAAAtV/4GAAAAAXNSR0IArs4c6QAAAGxlWElmTU0AKgAA
+AAgABAEaAAUAAAABAAAAPgEbAAUAAAABAAAARgEoAAMAAAABAAIAAIdpAAQAAAABAAAATgAAAAAAAACQ
+AAAAAQAAAJAAAAABAAKgAgAEAAAAAQAAAp6gAwAEAAAAAQAAAIAAAAAAcQXnGQAAAAlwSFlzAAAWJQAA
+FiUBSVIk8AAAAgppVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADx4OnhtcG1ldGEgeG1sbnM6eD0iYWRv
+YmU6bnM6bWV0YS8iIHg6eG1wdGs9IlhNUCBDb3JlIDUuNC4wIj4KICAgPHJkZjpSREYgeG1sbnM6cmRm
+PSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4KICAgICAgPHJkZjpE
+ZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIKICAgICAgICAgICAgeG1sbnM6ZXhpZj0iaHR0cDovL25zLmFk
+b2JlLmNvbS9leGlmLzEuMC8iCiAgICAgICAgICAgIHhtbG5zOnRpZmY9Imh0dHA6Ly9ucy5hZG9iZS5j
+b20vdGlmZi8xLjAvIj4KICAgICAgICAgPGV4aWY6UGl4ZWxYRGltZW5zaW9uPjY3MDwvZXhpZjpQaXhl
+bFhEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlBpeGVsWURpbWVuc2lvbj4xMjg8L2V4aWY6UGl4ZWxZ
+RGltZW5zaW9uPgogICAgICAgICA8dGlmZjpSZXNvbHV0aW9uVW5pdD4yPC90aWZmOlJlc29sdXRpb25V
+bml0PgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KEW+9
+lAAAIEJJREFUeAHtnWW0JDW3hi8wuLsP7q6Du7s7DO4M7u7u7izc7eKw8MHdfXB3d+7Dl+9u9kpV96nu
+ktNdvOfHTCoVfSrJTnZ20oP17dv3f/QnAiIgAiIgAiJQFwKD16UiqocIiIAIiIAIiMDfBCTa1Q5EQARE
+QAREoFYEJNpr9TlVGREQAREQARGQaFcbEAEREAEREIFaEZBor9XnVGVEQAREQAREQKJdbUAEREAEREAE
+akVAor1Wn1OVEQEREAEREAGJdrUBERABERABEagVAYn2Wn1OVUYEREAEREAEJNrVBkRABERABESgVgQk
+2mv1OVUZERABERABEZBoVxsQAREQAREQgVoRkGiv1edUZURABERABERAol1tQAREQAREQARqRUCivVaf
+U5URAREQAREQAYl2tQEREAEREAERqBUBifZafU5VRgREQAREQAQk2tUGREAEREAERKBWBCTaa/U5VRkR
+EAEREAERkGhXGxABERABERCBWhGQaK/V51RlREAEREAERECiXW1ABERABERABGpFQKK9Vp9TlREBERAB
+ERABiXa1AREQAREQARGoFQGJ9lp9TlVGBERABERABCTa1QZEQAREQAREoFYEJNpr9TlVGREQAREQARGQ
+aFcbEAEREAEREIFaEZBor9XnVGVEQAREQAREQKJdbUAEREAEREAEakVAor1Wn1OVEQEREAEREAGJdrUB
+ERABERABEagVAYn2Wn1OVUYEREAEREAEJNrVBkRABERABESgVgQk2mv1OVUZERABERABEZBoVxsQAREQ
+AREQgVoRkGiv1edUZURABERABERAol1tQAREQAREQARqRUCivVafU5URAREQAREQAYl2tQEREAEREAER
+qBUBifZafU5VRgREQAREQAQk2tUGREAEREAERKBWBCTaa/U5VRkREAEREAER6DPccMOJggiIgAiIgAiI
+QOEEDjv8UEtzrz33NnfZDq3ayyas9EVABERABESgUgIS7ZXiVmYiIAIiIAIiUDYBifayCSt9ERABERAB
+EaiUgER7pbiVmQiIgAiIgAiUTUCivWzCSl8EREAEREAEKiXQ58cff6w0Q2UmAiIgAiIgAv8+AlVKW63a
+/33tSzUWAREQARGoNQGJ9lp/XlVOBERABETg30egT2dWefnll19xxRUp26+//rrzzjv/9NNP+cu52267
+jTvuuKTz1FNPXXTRRfkTrCyFGWeccaONNiK7P//88/DDD//8888ry1oZiYAI1IzALrvsMvXUU1Opd999
+96CDDur22g055JB9+vTJKSOGGmooOCBuup2Glb940b7hhhvONttslkF2x++//77jjjuG8JNOOul4440X
+3GOMMcZ7772XPZ1GIeeYY47hhx+et2OPPXZ3ifZJJplkqqmmCvVCzN99992N6ij/9ghMN910W265Zca4
+e++997fffpsxsIKJQKcRQK6H0XXMMcfstLJlLM9ggw22zjrrLLHEEkgH3MT666+/kO6PPfbYOeec8+WX
+X2ZJZ9ppp11jjTVmmGGGYYYZxifCjOeSSy558sknfSKLL774Cius4H2Cm0wRT2+++eYLL7xAxGSA3vIp
+XrTPN998JpV7q1bKVwRaIjD99NNPNtlkGaNwN7NEe0ZW2YOtuuqqyy67bAg/YMCA77//Pnvc3g3JtHv3
+3XcPZTj//PMfeOCBKsvTvdzapoR8OeaYY0YZZRSfArKZjrnQQgshgE466aS77rrLv43crPH23XdflkmR
+f0iEqc/BBx/88ccfb7zxxhZg0UUXbTREMHqEYB988MFxxx338ssvW6xedBQv2nuxMspaBESgSwkwbo4z
+zjih8EE72i0VGXXUUa3k1a9qupdbe98X+X3yyScPO+ywjaKjnEf7+9VXX0XLbguPXD/33HNHGmkk80l1
+RFOH1DCR5/jjj3/sscfeeOONZ5xxRvSq+sfiRftrr702xBBDJGsy1lhjBaUHr7744gvU71GYP/74I/Ip
+9hGdycQTT0yahaj3iy1b89Qo8Ndffz344H/bPOJoHlhv2yDw4YcffvLJJxaRhkpztUf/Cs+ff/7ZXskh
+AiJQJYH999/fy/V33nnn0UcfZVSca665UK0HEcO/LMpXWWUVjJOSZTvllFO8XP/uu++QWcwDUK2zMYcO
+ZsIJJ0zG8j5ECQZPSDpmACOOOKKJNoKht6dITz/9tI9Svbt40X7UUUelVuPyyy83oLvuuivqjtRg5Xnu
+scce5SVeasrY/bGxVGoW//LEUaJ6PerQQw993XXXBSZ0+GDD+C9HpOqLQK8TQCmC/LZiPPjgg4cddlh4
+vP766+edd17sYMIjip/+/fuzP2KBg4Mw2FqZ53PPPeflwu23386raaaZZq+99mrym6hXXHHFtddea4mw
+6Np8880x/TYBv88++7BRYgF6xfH3QlB/IiACIiACItDhBNZbbz0r4WeffWZyPXgOHDjwpptusgBLLrmk
+uc2BDDY3Nnderps/m+Xrr7/+FltsYT7NHegG0MB702z0Ctj3NY9V9tviV+0llRhThbXXXpvTa0ym0Oe/
+/vrrbGm89dZbTbLr27fv5JNPngyA9oZ1cNI/6cO2zZprrolVBepZtC5sGWA/RfQnnnjitttuYz2XjFKI
+D7O/+eefn9yTqaE4+uabb5L+SR/mnkwkITbaaKOxDMUuiYjvv//+Lbfc8uqrrybDF+uDQSkqMibI7ERS
+HVRY7H6Fr0afbJ4X2h1sYYJpG58b4G+88cazzz5L1/3tt9+ax63BW/YCOc2BbpBzImziMnCg/cP49ppr
+rmligou183LLLcfO6+ijjz7yyCPTVjHqQdOI4S66wUbcQB1GQAyMb775Zpo0n4zlDumQCG1m0KBB9957
+L+QLB0sFMVG2ZMNmWXhEpfnDDz/Yq+CgeE0u82I9169fP9IkHdrMRx99xACNXvT555+P0kk+ttrNQY25
+lqUz0UQTmRvLrKQSmOECjBYmp6NYbqEwtIGlllqKgYKDZKB75ZVXLrvssiyWjHxBzkMxQjLYUnG2Dmlv
+KMBoezmrmRp95plnNn+apbnNgW6YXhAeadtoyxk97C3dyg4F0OA5SGyvko5WzxiTNfb2GNuHpGadddY7
+7rgjmWxlPinCo7K8s2fEkXTfl/hmnAdDeJx11lk33HBDo3T23HNP3+ssGB87i36bU3zs1kTylXGTdGaZ
+ZZbNNtuMFty8cViOrToY01Onk6SDaWiPh9/oZiimoh0joDH8Ie/hxvFNznqVtCfCpGSbbbYhO19r+hjl
+YeBDT8XWdRMV9wILLMC5W48d5kgaRh+Ge0yRm8/nfKbd6F5mmWX4NL761AICbAHy4fhkRx55ZDQzW331
+1VdbbTWmnlF9iQVwXiGwUTBGsUJg+pF9CyQiMz/gWzqhzaDD9JpPe5vTQbHnnHPO1EQYIpP+TGpTPz0T
+R8aHBRdc0EehvdHU6b/MCNl2TVr2WOA2ujkCxqBZOsHByMBf5MkkiRNZkWfbj0VxCwWgpVE2b1kS0NEO
+GYJS20yISNtg75WhxlcEwYlIY8mLnPOrWB+mbTdFpWwWPSjP7TE4OPbG/I+WHB4XWWQRrznfdNNNLTzT
+l19++cUeC3Gw5sSSLiRF8+td0d4FCnkaipfr9g3o0uhM2jtDb4k0ctAIGF+iETYKHDXr6G1vPdLlTjjh
+hEiuR4VhIyoc8Y/88z/SsRkRIrkeJRumR5FneNxpp52I3gg7ZcY4NtxllBq9qz1HGGEEDu1su+22japP
+7VjEzzPPPFE18UnKdR8G9SBnclI3/1i7WEjWXlwPZY/egRJl++239z4d4qadcwQ5kuu+bDPNNNOll17q
+t1f92+7t5r4Wedwnnniil+uWFHo+VhH8az7ewfz74osvbjQAMjKjYaUx+yj53czvLRGkMqav9ugdaKrs
+MdLaeiU5rcKCFeXw9uNlW4X3WOYuWLUzrFAN5t2YuPM5mRZNOeWUVjE6Z6NDDo8//rifl00xxRS0OYvY
+xIFkYr5vAVDCP/LII+ipGCJRAzCalH3EBc25b6DIwibi0MoZHJgomlKIgRu15IsvvohuDW6UnO5R6ski
+VtUGGSUwqkhWWrRy1KQTTDABa0R7GxWbR2Zpiy22mPljiM7MGrUYEemiQeARnd2ye+65p34nyw888EA/
+ElH9t99+mzbP90KLA4RG46wR43PTSlGKoLeHOa2UFWSIBbdNNtnkmWeeIUELHzmCtpNEiE7W7KSgbrV5
+Bud6mVcltc1RItkf2VBD/WvhWeVYu0Wpm9xB8JpVi8VZI6ZE9oi9NB0HpRTEmKmExkYAzKq33nprCxYc
+bXdzBhYato3j7HnZ4TfUKnyCKKMsmwJRlCaPhXAL6cOfLokbfRjcGCUYJK0uVHCDDTY4++yzo8IQwHdz
+Ggz9lHbF5/Pm5TRmmhzHzKLobT/SQixuk60Zf56FXmNRcFgDw91IavjwrbrZ0bAofgA3zyodXSDawYGi
+g9WM7TEjA7gfMfRbZC1amtRuf95553mUV111Vcalql8XRiaUIUEaNwKGzWOffoFu5NkOO+xgCVKe7DYd
+ZkFKl2OlxS61pRMc7EWRIEgj//yPTB1s+cjmOqrOyByBuRGDBf0/NS/U+ObPELnVVlvZzAzgp556KtEJ
+wHffbrvtDj30UAtcAwcy2I9cbBJzaYaXo1jh8jX91MdqzTDH36233spCyoiFt7A65JBDTEVMo7XLVSy6
+dyBQ2cpBsgZP9KvYBwXsyHhMN5rsf/l0srixd/EmL37T7YgjjshyoRgNzOQ64pwmgdC1rJHuRx99dCg8
+M0v2ifwhCIK13c0Zbfbbbz/LCJsSe0RLjPm0vSrDkZ9bVCqmKUx9GC6CP0ONkWEXLCnaaSFh7CU8djO0
+KL+1RxPlWHkIwALp6quvtnE7yrfVR9smJ2LUzn1SftLv1Ye4rdhNNmh8Ui252UezVRMwy5g6tFSeLlDI
+g4lB37cPqPlNIGaaLdW5x8Be9lx44YXJ8DRl5hYsYpKveteH8deaFxqOpFyneBiRYiiQOhnKWXibVZDO
+/fffH8l1PPE588wz0bonM0KTb8sFOh7KGN97AY44t1iooH2nNf/udaBrscLfeeedrOC9XOcVj8cffzwQ
+2Pa2kMHBSRs21FkeeWLhFX3ngAMOsBWwnz1EiYRH1LMm1/Fh4GZCbCEL72iWchsOpum2xUA1ETBerpMg
+6iJvsJK8SLh7u3kbuBpFCdMUAFoAeqjd3BAmRvYKB+YRphgnGJNFL9cJwDVwbLSHKIhS3219Om24bQed
+uMxlG6XgRbtXC1mxichEsFH0NvzZmGAy6q+hRctbxtqppbJ1gWhn3ey/VqieNyWLtC4t1T81sG/oya3N
+1Cgd4mnTUsrD/qJ/rKCEXhphx9tSjn49yprVJxXSYabCX3BTr7nnnrul9Ds5MOtLsw9iWnPaaac1Ki3i
+ilMG0VvfXKNXPCLXzVzZtOvJYPiglfXdKoSx8/08+q3K1BSq9FxppZVMJf7QQw/5ub4VA1bmD+GoO3hu
+3dXNrYL5Hak6BrsqFWKmFwl5sYlumWLTk5xN8hYDOpscFDgd9KI9eYDCSuUXgb7Bh62HEKxJdEuniQMj
+StRX/LE5woEjNFv+zlqaHGYKTaJX86oLFPKp1/rYaAUmM0osChn7Rmadx73WFKDXtSsZq8Y4zl+Yq9Ks
+UZ1hP9Vkhpsx2YzBXnrpJQuJCnTllVf2gsFepTpsyc7b1OEGfxYE6PND9EYmPKmJd7inX0yzKE8dLjNW
+gU8PGXap4GmSzBQ5JMI6LKlNCYmnnqyjMIjAkFR2g4+Mpc0TzC/CkPHYjYfUrNbBYfKbR3bfTWgRuHu7
+eR5uUdzUI2T+eCrNyfdrb5BIG2uEHdEeNrb99nOUdauPNpMjYhMjNS/OfTC/G8sg2WruPjwl8YWxV/Qs
+Dm2lmu5bmMocXSDaMY1J4vj000/N0xtHmGceB2sXmmwYGrBCYteTb4ZhEQKeWw5Stdx5sis2LuYbtmJm
+LcLqlo6KJR3qSv5yTlebFxXZAChT4qH2Z3rLJIwDSHBD+9Jki8uLDVCnZuT9C5/PpeZYjae3nrNVZktZ
+sxO/1lprsbXpVzapKTQR7ZFm1aLz1cJk0U8R7G1vOXwDYLebvx5LgrW8F+1d3c17rGyWAMx7/BrXonhD
+h2jV7h+xdbAojRyIQLp2Idpp0wSQlw0yyXy9aPdv/f3ivhY+TE43FpTFmkzmKU8XiPakxSkVtsl4nso3
+iouIwiiJk50WgJbE0oo/rkNipENSshr2c1sL2esOJiIoxGyTiQkKh1v4W3jhhSkbZUaJxOUnJZUTwyWO
+EYdZEVnQzVhB8sdGFJ8M2YxtY6oKxOzvCNZoBmAKeVKmRiVVofpkvQaiiQV7o4Khz6c1ZpS7qauNkHKj
+OzpK7WuNKtWjfxvLwWgN0NXdvEc+WQL4Ra0P3+iLM3FsJDh99MjNcrkQ0e61j02msNwiYAXwFfSXIkQt
+wcJndDCOsUwiMHoL5otm90NPRDnPae3UCVPGxIsK1gWivVE7KwpBajr8hADaAqalyekhjZvPiYjimsOH
+H344NXovemKXQNvCDosjgiZirTxYmXIiBWthb4Fvb/M7sB/hzhMuSPGr8JAshWG7i5kHWvqk2W2WIcOb
+/jXp2/lrUXEKNjSQbyP52qhIDGSRXGdxw6oLVjYUoojOskzplY7WqF49+vujgKl7dskUmJFHnt3bzaOK
+tPfY6hf3NurMvzOuUIuScxy6sWo2kc2+N/mFPtcxWXRb+ZhPSw52Bv1NOIx16OGDpGAc4x6z5EnLltIv
+JHAXiPZC6tlGIv/7nz+kOLeAYUzLBM2veHBjlMu1NsUaW7ZRzmQUpDvnT5gsc1qJg09se9uaOARG6hd7
+5NSXAZ0ncwv0peTO1kDyQDZ78Owoe9Uo0dnT7VG6+5V60rLSl6G73KhSTL3MCr6RYjy1Usw+bb1OU0TM
+czYhCokqxfZoolfd+8ghbLM9ZCLbdjfs3m5e/bfzFk5ITUx5qixDxmW3X7X7roRlLtORMMiwzCBYUXMO
+dBIDBgxAuoelFOMtN9qGZX2VfKK8usBCPipxxY/sE2PuyLYxggpVs9+wYUj1N9tUXLAes2NbnVMozD+4
+Bh+7VvTwXtFtNy33mE57ARgF0E1x9AVBzrkjlPB+icCZmShZ62Z0j6SmJAT2RyFSt2miNLvl0VuTeDve
+LOW3kwLg5VRhUq6TiJ8SZUmzK8J49QaXxuQsc/d285wVbyk6A4hNoZqsm1tKM3tgvxhAC9Vo5e0tV7x1
+DhmZHgv3uuuumz3rHkMy3HmDxE64ulGivcev9k8A5BP3OfjR09+L90+4znMhOBG0CFST7ugze1wlF1UP
+NjWZFbF2tAS9nW3w9CM1mhIL6R3YOtijV6+ZZ5c6vOmcHc3IWBfTx6CB98saH91Wt96z09x+5md6iCaF
+pFHZW3YczJ3f0Wo3b7Xk+UvoU6g4d5uCM3p4m3NfpJLcbDPZ8MUCIPV2Yfz9PM/fiUSp2C60sgXbI3vM
+7+BiXSsehiCpl6PnzyV7ChLt2Vn9N6TfJ/bKn5YTqjwC6ilvpcVNolUWgVmtHetKrsu9rg81Q2rBfG/0
+K93UwF3k6feA+b07U863VAVjG8Viz6jR+iYK2buPfm6H3WWPheFyUwvDBpC5i3Jk7+ZegcS2XVEFyJhO
+q9wyJtsomJ9So4VuFKwkfz95TRXtHJQIWnEKgKCNjDC4jccM65iXNPp1ovYKTwf0vweDore9dIqKJdGe
+QpL9ziYLndlnn93ieElpnr3r8FfCJUvip7SFX5SLaZvXhkW5Y95iq7GkHPK3nqELSYo3ZsFmC4ZWMHkp
+W5RdFz2y1+5vnObW0kYKFTaAuLbPV80OqXOfTNK0kPHL3zHiI3aa28/Vsqx4/G+8skji/okmNeLH67zK
+J4Qsqpt7aeevLmlSngJftcotZ9bcp2R6An7do8mvadCGaa7WZ3PmG6J72UlHiGijifTWwb5Pheh0Fn/Z
+MKpEu4M5Kt7SSy/ttYzR20aP3AhpcFg4ZWnGjZLK7y/RnsKQ66z5XSAaMZbk0Wuagr/qPPUcVxSlykdG
+K370E+tNNnsijQLtnuvEzXwU0w9THxVVQhaIaKW4yZxldyScWM3A0ybU0WyaAqBR4OB7KAnBTj/9dH+P
+FZaM/irW66+/PnldXZ5asK5FNmBOEf68IQKv/t/7v//bBCVPjlFcPo0NClzFn/xZLQCyjuT+3Wi3wn4M
+A+D8VorXkWI6RzrekjzKtKMe/S17/LAYdxM1Lzm4uHnXqkCv5PBFFAUa7KBx1BODr6QqqKhuTlO0HWiM
+pbnZt42DeVaRVh2tcms1/Sg82rX77rsveNJPaZNJlQnzcqanHIShuZotSJROe49c/Wb75eROr8FgLSRF
+r+G8g+1P4Zn603N+aoIpNCnwA992zSKdnaU8p58wEvJ3ImUsLVMHv+To3YV7bS3kL7jgAhNj4cOY3Qcy
+zx9d4C3Wnv4X3IMEwtCRX3znGmoOXSAIGUpoAT5NPLmJJeNXzx4MARL9GrQXk0xL+aUcn9qVV17pL23m
+FTXldx34Y3HM3hirc1o86/VQrxCXn23wiRTi5uIU0mFc69+/P0NzOIWFNR8TWH9sBpKpv6jIvZX2iz5U
+mfvM6SoUHuxemjKMpl7sn6cK/B7GxhtvnJoCJeGHavwrbF+95a1/1bYbtSrLUJtS0MywjaCm2IGTJo++
+DfhcaOfYhwcf9Ni0BMpGA2AFn9z18BE7zY2BKn8M0BSMhsoNPPzRVEyDSrP3++sEGzhwIGozG4IxU6Bf
+M/TT7ImF4q35TkSB3fy2226zK8SRDcyofMn5KKkNvpBP0Aa3nPkyo0Jgh1kUDFELMadnkIQ5wpJleqO2
+mjPfEB35zZwsuMmIxg9qVinRt2ZbPWotIQodClN2+7Utys9VJfxxPx3lNxnRdlGZOqDMCE2LcY9JattJ
+5YxY21U7soTv5P+MFNy9P24/17NgwUFgBlbslrmOwMt1mgIrY1pVFD7/IwftouL5roI7ettkiUD3wzqa
+BNGbhdYWise8mwuQ8xe1SQpkh2hh/o523ct1orC4ZDxKxkUmcSTU6xJIgZJ7uc5EATvwYpfslMQfa0wW
+rBofBgWmLL5FUXE+Ln++AUSFYZ7BDermSUVgztrR5DoNFfNvC9DJDi6K8F+fotKKqLv9JQtPH7RFZAjP
+Sh1iNLlorE/G9T45uznSwl+6EEpixU5ulPis87vb4JYnU1oUR2e98TmtjokUrY5/qXWexHuMi8lOdOMW
+3y761gh1fr6rUVIMfSgbfEcjJCkwrjaKkt2fpRQ/nWzhk0eB7FXZjupEOw2ipMr4ewkKyQJVEvYa0Shj
+KdMm2LDh7IS/jtHe9q6DXVsG+miU8UVC04D4RGnvPYtygwWluu3+JpNl94sVcPI3SCwkKaCxSF0Tg53E
+Ua54UxqLWA8HyzuUB+g8o3En1I7FKApJb5QQ/FEqsnZPba6DBg1CfeJ5Jq0csqMztXP2KC2FZNsY7S4q
+TS4tSBIwTaxPk0kejZllXJM2z54F0NAJ+Yi4C+zmFIMBAZ0BF2DzIaLCN+kRUZHae2yDW/aMmEwnAzP0
+cSsLE5pGTQICKFQwW/NHwpLptOfDljY/4JtaMOAjuZO/8hdlxKenozEcRV/KgjFxSTYYe5uatb31vwjK
+LNP8K3YMxu5sxVl2UXasd1l0snAMEzqGSMZKfvUydZTpqHqxUMDcj4PgHDNjUsUfjRWjYm90U16ByXTm
+mWdmCwCdAZo6egK58yMT3pq3ee4sWLEHJBHIM2qzN8+p1kb9sHlS3fiWhQh6ZqyEaH6MVtBjGDJbhNQa
+sUwHF8fA+Ogwx2ybH4ftwNlnauEL8WS9iK0czYa1I/oqZkis3oDWY2/t3m5eCLeciaAmoaFi2EFXpYdi
+VEhzBXsFvZUP169fP8xQULOjTmeIYG3QUr4s1rG0Z+81aBYpPApFjswVeCPWCSf+YxGyw/Y75qSdPbpE
+e3ZWCikCIiACIiACLRDoLdFenUK+BRgKKgIiIAIiIAIi0C4BifZ2ySmeCIiACIiACHQkAYn2jvwsKpQI
+iIAIiIAItEtAor1dcoonAiIgAiIgAh1JQKK9Iz+LCiUCIiACIiAC7RKQaG+XnOKJgAiIgAiIQEcSkGjv
+yM+iQomACIiACIhAuwQk2tslp3giIAIiIAIi0JEE+pR9v3FH1lqFEgEREAEREIFKCVQpbbVqr/TTKjMR
+EAEREAERKJuARHvZhJW+CIiACIiACFRKQKK9UtzKTAREQAREQATKJiDRXjZhpS8CIiACIiAClRKQaK8U
+tzITAREQAREQgbIJ6Eddyyas9EVABERABESgUgJatVeKW5mJgAiIgAiIQNkEJNrLJqz0RUAEREAERKBS
+AhLtleJWZiIgAiIgAiJQNgGJ9rIJK30REAEREAERqJSARHuluJWZCIiACIiACJRNQKK9bMJKXwREQARE
+QAQqJSDRXiluZSYCIiACIiACZROQaC+bsNIXAREQAREQgUoJSLRXiluZiYAIiIAIiEDZBCTayyas9EVA
+BERABESgUgIS7ZXiVmYiIAIiIAIiUDYBifayCSt9ERABERABEaiUgER7pbiVmQiIgAiIgAiUTUCivWzC
+Sl8EREAEREAEKiUg0V4pbmUmAiIgAiIgAmUTkGgvm7DSFwEREAEREIFKCUi0V4pbmYmACIiACIhA2QQk
+2ssmrPRFQAREQAREoFICEu2V4lZmIiACIiACIlA2AYn2sgkrfREQAREQARGolIBEe6W4lZkIiIAIiIAI
+lE1Aor1swkpfBERABERABColINFeKW5lJgIiIAIiIAJlE5BoL5uw0hcBERABERCBSglItFeKW5mJgAiI
+gAiIQNkEJNrLJqz0RUAEREAERKBSAhLtleJWZiIgAiIgAiJQNgGJ9rIJK30REAEREAERqJSARHuluJWZ
+CIiACIiACJRNQKK9bMJKXwREQAREQAQqJSDRXiluZSYCIiACIiACZRP4P6721h1vhDZmAAAAAElFTkSu
+QmCC</data><mime>image/png</mime><width>670</width><height>128</height><duration>0</duration><recognition><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE recoIndex PUBLIC "SYSTEM" "http://xml.evernote.com/pub/recoIndex.dtd">
+<recoIndex docType="unknown" objType="image" objID="76dd28b07797cc9f3f129c4871c5293c" engineVersion="7.0.24.1" recoType="service" lang="en" objWidth="670" objHeight="128"><item x="26" y="52" w="81" h="29"><t w="76">This</t></item><item x="134" y="52" w="35" h="29"><t w="79">is</t></item><item x="196" y="60" w="37" h="21"><t w="73">so</t></item><item x="300" y="60" w="57" h="21"><t w="71">can</t></item><item x="382" y="54" w="79" h="27"><t w="77">test</t></item><item x="486" y="52" w="59" h="29"><t w="82">the</t></item><item x="570" y="54" w="59" h="25"><t w="74">OCR</t><t w="33">DeR</t><t w="15">OCR!</t><t w="14">OCR]</t></item></recoIndex>]]></recognition><resource-attributes><timestamp>19700101T000000Z</timestamp><reco-type>unknown</reco-type><file-name>Untitled-1.png</file-name></resource-attributes></resource></note>
+</en-export>
diff --git a/tests/test_evernote_to_sqlite.py b/tests/test_evernote_to_sqlite.py
index eb9666e..22c3a51 100644
--- a/tests/test_evernote_to_sqlite.py
+++ b/tests/test_evernote_to_sqlite.py
@@ -4,6 +4,7 @@
 import pathlib
 
 example_enex = pathlib.Path(__file__).parent / "example-note.enex"
+example_broken_enex = pathlib.Path(__file__).parent / "example-note_broken.enex"
 
 
 def test_version():
@@ -110,3 +111,199 @@ def test_enex(tmpdir):
     ]
     # Check we enabled Porter stemming
     assert "tokenize='porter'" in db["notes_fts"].schema
+
+
+def test_recover_proper_enex(tmpdir):
+    output = str(tmpdir / "output.db")
+    result = CliRunner().invoke(
+        cli, ["recover-enex", output, str(example_enex)], catch_exceptions=False
+    )
+    assert 0 == result.exit_code
+    db = sqlite_utils.Database(output)
+    assert set(db.table_names()) == {
+        "notes",
+        "resources",
+        "resources_data",
+        "note_resources",
+        "notes_fts_idx",
+        "notes_fts",
+        "notes_fts_config",
+        "notes_fts_docsize",
+        "notes_fts_data",
+        "resources_fts_config",
+        "resources_fts",
+        "resources_fts_idx",
+        "resources_fts_data",
+        "resources_fts_docsize",
+    }
+    assert list(db["notes"].rows) == [
+        {
+            "id": "e2d3f11777001291c06f20a1de05772fe0ba5a2c",
+            "title": "Example note with images",
+            "content": '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>This note includes two images. &scaron;.</div><div><br /></div><div><span style="font-weight: bold;">The Python logo</span></div><div><br /></div><div><en-media hash="61098c2c541de7f0a907c301dd6542da" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div><span style="font-weight: bold;">The Evernote logo</span></div><div><br /></div><div><en-media hash="91bd26175acac0b2ffdb6efac199f8ca" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div>This image contains text:</div><div><br /></div><div><en-media hash="76dd28b07797cc9f3f129c4871c5293c" type="image/png" /></div><div><br /></div></en-note>',
+            "created": "2020-10-11T21:28:22",
+            "updated": "2020-10-11T23:30:38",
+            "latitude": "37.77742571705006",
+            "longitude": "-122.4256495114116",
+            "altitude": "23.16121864318848",
+            "author": "Simon Willison",
+            "source": "desktop.mac",
+            "reminder-order": "0",
+        }
+    ]
+
+    assert list(db["resources"].rows) == [
+        {
+            "md5": "61098c2c541de7f0a907c301dd6542da",
+            "mime": "image/svg+xml",
+            "width": "0",
+            "height": "0",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": None,
+            "reco-type": None,
+            "file-name": None,
+        },
+        {
+            "md5": "91bd26175acac0b2ffdb6efac199f8ca",
+            "mime": "image/svg+xml",
+            "width": "0",
+            "height": "0",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": None,
+            "reco-type": None,
+            "file-name": None,
+        },
+        {
+            "md5": "76dd28b07797cc9f3f129c4871c5293c",
+            "mime": "image/png",
+            "width": "670",
+            "height": "128",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": "This is so can test the OCR",
+            "reco-type": "unknown",
+            "file-name": "Untitled-1.png",
+        },
+    ]
+    resource_md5s = [rd["md5"] for rd in db["resources_data"].rows]
+    assert resource_md5s == [
+        "61098c2c541de7f0a907c301dd6542da",
+        "91bd26175acac0b2ffdb6efac199f8ca",
+        "76dd28b07797cc9f3f129c4871c5293c",
+    ]
+    assert list(db["note_resources"].rows) == [
+        {
+            "note_id": "e2d3f11777001291c06f20a1de05772fe0ba5a2c",
+            "resource_id": "61098c2c541de7f0a907c301dd6542da",
+        },
+        {
+            "note_id": "e2d3f11777001291c06f20a1de05772fe0ba5a2c",
+            "resource_id": "91bd26175acac0b2ffdb6efac199f8ca",
+        },
+        {
+            "note_id": "e2d3f11777001291c06f20a1de05772fe0ba5a2c",
+            "resource_id": "76dd28b07797cc9f3f129c4871c5293c",
+        },
+    ]
+    # Check we enabled Porter stemming
+    assert "tokenize='porter'" in db["notes_fts"].schema
+
+
+def test_recover_broken_enex(tmpdir):
+    output = str(tmpdir / "output.db")
+    result = CliRunner().invoke(
+        cli, ["recover-enex", output, str(example_broken_enex)], catch_exceptions=False
+    )
+    assert 0 == result.exit_code
+    db = sqlite_utils.Database(output)
+    assert set(db.table_names()) == {
+        "notes",
+        "resources",
+        "resources_data",
+        "note_resources",
+        "notes_fts_idx",
+        "notes_fts",
+        "notes_fts_config",
+        "notes_fts_docsize",
+        "notes_fts_data",
+        "resources_fts_config",
+        "resources_fts",
+        "resources_fts_idx",
+        "resources_fts_data",
+        "resources_fts_docsize",
+    }
+    assert list(db["notes"].rows) == [
+        {
+            "id": "0c59e90500da181d5518ec94c68956f23bfd79c2",
+            "title": "Example note with images",
+            "content": '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>This note includes two images. &scaron;.</div><div><br /></div><div><span style="font-weight: bold;">The Python logo</span></div><div><br /></div><div><en-media hash="61098c2c541de7f0a907c301dd6542da" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div><span style="font-weight: bold;">The Evernote logo</span></div><div><br /></div><div><en-media hash="91bd26175acac0b2ffdb6efac199f8ca" type="image/svg+xml" width="125" /><br /></div><div><br /></div><div>This image contains text:</div><div><br /></div><div><en-media hash="76dd28b07797cc9f3f129....BROKEN......',
+            "created": "2020-10-11T21:28:22",
+            "updated": "2020-10-11T23:30:38",
+            "latitude": "37.77742571705006",
+            "longitude": "-122.4256495114116",
+            "altitude": "23.16121864318848",
+            "author": "Simon Willison",
+            "source": "desktop.mac",
+            "reminder-order": "0",
+        }
+    ]
+
+    assert list(db["resources"].rows) == [
+        {
+            "md5": "61098c2c541de7f0a907c301dd6542da",
+            "mime": "image/svg+xml",
+            "width": "0",
+            "height": "0",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": None,
+            "reco-type": None,
+            "file-name": None,
+        },
+        {
+            "md5": "91bd26175acac0b2ffdb6efac199f8ca",
+            "mime": "image/svg+xml",
+            "width": "0",
+            "height": "0",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": None,
+            "reco-type": None,
+            "file-name": None,
+        },
+        {
+            "md5": "76dd28b07797cc9f3f129c4871c5293c",
+            "mime": "image/png",
+            "width": "670",
+            "height": "128",
+            "duration": "0",
+            "timestamp": "19700101T000000Z",
+            "ocr": "This is so can test the OCR",
+            "reco-type": "unknown",
+            "file-name": "Untitled-1.png",
+        },
+    ]
+    resource_md5s = [rd["md5"] for rd in db["resources_data"].rows]
+    assert resource_md5s == [
+        "61098c2c541de7f0a907c301dd6542da",
+        "91bd26175acac0b2ffdb6efac199f8ca",
+        "76dd28b07797cc9f3f129c4871c5293c",
+    ]
+    assert list(db["note_resources"].rows) == [
+        {
+            "note_id": "0c59e90500da181d5518ec94c68956f23bfd79c2",
+            "resource_id": "61098c2c541de7f0a907c301dd6542da",
+        },
+        {
+            "note_id": "0c59e90500da181d5518ec94c68956f23bfd79c2",
+            "resource_id": "91bd26175acac0b2ffdb6efac199f8ca",
+        },
+        {
+            "note_id": "0c59e90500da181d5518ec94c68956f23bfd79c2",
+            "resource_id": "76dd28b07797cc9f3f129c4871c5293c",
+        },
+    ]
+    # Check we enabled Porter stemming
+    assert "tokenize='porter'" in db["notes_fts"].schema
\ No newline at end of file

From 4b6349cb805398e4ba5e6555786b842044f8d790 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 09:22:10 +0200
Subject: [PATCH 09/10] Removing comment

---
 evernote_to_sqlite/cli.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/evernote_to_sqlite/cli.py b/evernote_to_sqlite/cli.py
index 2f03dfd..931aa72 100644
--- a/evernote_to_sqlite/cli.py
+++ b/evernote_to_sqlite/cli.py
@@ -84,18 +84,6 @@ def recover_enex(db_path, enex_file, max_note_size=30, resume_file=None):
     and specifically useful for very large Enex file. Be warned that this takes
     a very long time for larges Enex files."""
 
-    # with Progress() as progress:
-    #     task1 = progress.add_task("[red]Downloading...", total=1000)
-    #     task2 = progress.add_task("[green]Processing...", total=1000)
-    #     task3 = progress.add_task("[cyan]Cooking...", total=1000)
-    #
-    #     while not progress.finished:
-    #         progress.update(task1, advance=0.5)
-    #         progress.update(task2, advance=0.3)
-    #         progress.update(task3, advance=0.9)
-    #         progress.console.print(f"Working on job #{dt.datetime.now().isoformat()}")
-    #         time.sleep(0.01)
-
     file_length = os.path.getsize(enex_file)
     db = sqlite_utils.Database(db_path)
     fp = open(enex_file, "r", encoding="utf-8")

From a5839dadaa43694f208ad74a53670cebbe756956 Mon Sep 17 00:00:00 2001
From: Daniel Engvall <daniel@engvalls.eu>
Date: Sat, 15 May 2021 21:57:44 +0200
Subject: [PATCH 10/10] Missing some packages in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f0ce878..cbbab48 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@ def get_long_description():
         [console_scripts]
         evernote-to-sqlite=evernote_to_sqlite.cli:cli
     """,
-    install_requires=["click", "sqlite-utils>=3.0"],
+    install_requires=["click", "sqlite-utils>=3.0", "rich~=10.2.0", "lxml~=4.6.3", "typing_extensions"],
     extras_require={"test": ["pytest"]},
     tests_require=["evernote-to-sqlite[test]"],
 )