Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
obar1 committed Apr 4, 2024
1 parent 520d013 commit c291174
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 83 deletions.
10 changes: 7 additions & 3 deletions demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ function setup0to100_sb {
cp ./zero_to_one_hundred/tests_sb/resources/map.yaml .

# safari books from lorenzodifuccia
git clone https://github.com/lorenzodifuccia/safaribooks.git
pip install --quiet -r safaribooks/requirements.txt
# git clone https://github.com/lorenzodifuccia/safaribooks.git
# pip install --quiet -r safaribooks/requirements.txt
}

function 0to100 {
Expand Down Expand Up @@ -54,12 +54,16 @@ function 0to100_sb {
url=https://learning.oreilly.com/library/view/the-pragmatic-programmer/9780135956977/
./main_sb.py snatch_book "$url"

echo 'add any metadata you like'
echo '{"title": "The Pragmatic Programmer: your journey to mastery, 20th Anniversary Edition, 2nd Edition"}'> 9780135956977/9780135956977.json
./main_sb.py refresh_metadata "$url"

url=https://learning.oreilly.com/library/view/rewire-your-brain/9781119895947/
./main_sb.py snatch_book "$url"

url=https://learning.oreilly.com/library/view/rewire-your-brain/9781119895947/
echo 'pretend book was read fully :P'
echo '{"page_curr": "1", "pages_tot": "1"}' > 9781119895947/9781119895947.json
echo '{"page_curr": "1", "page_tot": "1"}' > 9781119895947/9781119895947.json
./main_sb.py refresh_metadata "$url"

./main_sb.py refresh_toc
Expand Down
18 changes: 7 additions & 11 deletions zero_to_one_hundred/models/meta_book.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from zero_to_one_hundred.views.markdown_renderer import MarkdownRenderer


class MetaBook(MarkdownRenderer):
class MetaBook:
epub_suffix = ".epub"
HTTP_OREILLY_COVER = "https://learning.oreilly.com/library/cover"
HTTP_OREILLY_LIBRARY = "https://learning.oreilly.com/library/"
Expand Down Expand Up @@ -41,8 +41,6 @@ def __init__(
self.path_pdf_as_md = self.path_as_md(f"./{self.isbn}/{self.isbn}.pdf")
self.path_img_as_md = self.path_as_md(f"./{self.isbn}/{self.isbn}.png")

def asMarkDown(self):
return f"MetaBook {self.http_url}, {self.isbn} {self.contents_path}"

@classmethod
def build_from_dir(
Expand All @@ -65,7 +63,6 @@ def write_img(self):
)

def write_epub(self):
try:
if self.config_map.get_download_books:
self.persist_fs.write_fake_epub(self.path_epub)
self.process_fs.write_epub(self.config_map, self.path_epub, self.isbn)
Expand All @@ -74,10 +71,8 @@ def write_epub(self):
print(
f"DDD skipping get_download_books {self.config_map.get_download_books}"
)
except Exception as e:
Validator.print_DDD(e)

def write_json(self):
def write_metadata(self):
self.metadata.write_json()

@classmethod
Expand All @@ -101,18 +96,18 @@ def write(self):
except Exception as e:
Validator.print_DDD(e)
try:
self.metadata.write_json()
self.write_metadata()
except Exception as e:
Validator.print_DDD(e)
try:
self.write_pdf(self.path_epub)
except Exception as e:
Validator.print_DDD(e)
try:
self.write_splitter_pdf(self.path_pdf, self.config_map.get_split_pdf_pages)
except Exception as e:
Validator.print_DDD(e)

def read_json(self):
return self.metadata.read_json()

@classmethod
def get_isbn(cls, http_url):
http_url = http_url.strip("/")
Expand All @@ -138,3 +133,4 @@ def path_as_md(self, a_path):
use relative path and convert " " to %20
"""
return a_path.replace(" ", "%20")

96 changes: 47 additions & 49 deletions zero_to_one_hundred/models/metadata.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,76 @@
import json
from typing import OrderedDict

from zero_to_one_hundred.configs.sb_config_map import SBConfigMap
from zero_to_one_hundred.repository.sb_persist_fs import SBPersistFS
from zero_to_one_hundred.repository.sb_process_fs import SBProcessFS
from zero_to_one_hundred.validator.validator import Validator
from zero_to_one_hundred.views.markdown_renderer import MarkdownRenderer


class Metadata:
class Metadata(MarkdownRenderer):
ONE_HUN_PER_TXT = "100.0%"

DONE_TXT_AS_MD = '<span style="color:green">**DONE**</span>'
WIP_TXT_AS_MD= '<span style="color:yellow">**WIP**</span>'


def __init__(
self,
config_map: SBConfigMap,
persist_fs: SBPersistFS,
process_fs: SBProcessFS,
get_isbn,
http_url: str,
page_curr=0,
pages_tot=0,
):
self.config_map = config_map
self.http_url = http_url
self.persist_fs = persist_fs
self.process_fs = process_fs
self.page_curr = page_curr
self.isbn = get_isbn(http_url)
self.contents_path = persist_fs.abs_path(f"{self.isbn}")
self.page_curr = page_curr
self.pages_tot = pages_tot
self.path_json = f"{self.contents_path}/{self.isbn}.json"
self.metadata : dict = self.read()

def __repr__(self):
return f"Metadata {self.http_url}, {self.isbn} {self.contents_path}"

@property
def get_page_perc(self):
perc = 0
if self.pages_tot > 0:
perc = 100 * self.page_curr / self.pages_tot
return str(round(perc, 1)) + "%"

@staticmethod
def get_page_perc(metadata_dict: dict):
"""
given metadata_dict, get values of pages and return metadata_dict, n/a if no valid values are present
"""
page_curr = int(metadata_dict.get('page_curr',"0"))
pages_tot= int(metadata_dict.get('page_tot',"0"))
perc = 0.0
if pages_tot > 0:
perc = 100 * page_curr / pages_tot
return str(round(perc, 1)) + "%"
return "n/a"

def write(self):
self.write_json()

def write_json(self):
try:
self.page_curr = self.persist_fs.read_pages_curr(
f"{self.contents_path}/{self.isbn}.json"
)
except Exception as e:
Validator.print_DDD(e)
try:
self.pages_tot = self.persist_fs.read_pages_tot(
f"{self.contents_path}/{self.isbn}.pdf"
)
except Exception as e:
Validator.print_DDD(e)

txt = """
"isbn":"{isbn}",
"url":"{url}",
"page_curr":"{page_curr}",
"pages_tot":"{pages_tot}",
"page_perc":"{page_perc}"
""".strip()
txt = txt.format(
isbn=self.isbn,
url=self.http_url,
page_curr=self.page_curr,
pages_tot=self.pages_tot,
page_perc=self.get_page_perc,
)
print(txt)
self.persist_fs.write_json(self.path_json, "{" + txt + "}")
txt = json.dump(self.get_metadata(), indent=4)
self.persist_fs.write_json(self.path_json,txt )

def read_json(self):
def read(self):
json_data = self.persist_fs.read_file(self.path_json)
lines = "{}" if json_data is None else json_data
return json.dumps(json.loads("".join(lines)), indent=4)
return json.loads("".join(lines))

@property
def status(self):
"""use relative folder to simplify the usage in browser"""
return Metadata.DONE_TXT_AS_MD if Metadata.ONE_HUN_PER_TXT in self.metadata else Metadata.WIP_TXT_AS_MD


def get_metadata(self) -> str:
"""
refresh info for the final dict(), keys are orderered so it looks better :)
"""
metadata_dict = self.metadata
metadata_dict["isbn"]= self.isbn
metadata_dict["url"]= self.http_url
metadata_dict["pages_perc"]= self.get_page_perc(metadata_dict)
sorted_dict = OrderedDict(sorted(metadata_dict.items()))
return json.dumps(sorted_dict)

def asMarkDown(self) -> str:
return self.get_metadata()
17 changes: 4 additions & 13 deletions zero_to_one_hundred/models/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,18 @@ def build_from_dirs(
def asMarkDown(self):
def flatten_meta_book(meta_book: MetaBook):
print(f"flatten_meta_book {meta_book}")
json = meta_book.read_json().replace(
"\n", "<br/>"
) # trick to have LF in MD tables :P
print(json)
status = (
'<span style="color:green">**DONE**</span>'
if "100.0%" in json
else '<span style="color:yellow">**WIP**</span>'
)
res = "|".join(
txt = "|".join(
[
f'<span style="color:blue">**{meta_book.isbn}**</span>',
f"![`img`]({meta_book.path_img_as_md})",
f"[`epub`]({meta_book.path_epub_as_md})",
f"[`pdf`]({meta_book.path_pdf_as_md})",
f"{json}",
f"{status}",
f"{meta_book.metadata.asMarkDown()}",
f"{meta_book.metadata.status}",
]
)

return "|" + res + "|" + " "
return "|" + txt + "|"

flattened_meta_book = [flatten_meta_book(mb) for mb in self.meta_books]
backslash_n_char = "\n"
Expand Down
2 changes: 1 addition & 1 deletion zero_to_one_hundred/tests_sb/resources/map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ type: safari-books-map
configs:
download_engine_path: "safaribooks/safaribooks.py"
download_engine_books_path: "safaribooks/Books"
download_books: true
download_books: false
oreilly_username: "username"
oreilly_userpassword: "userpassword"
split_pdf_pages: 100
1 change: 1 addition & 0 deletions zero_to_one_hundred/tests_sb/test_meta_book.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ def test_is_valid_ebook_path():
dirs = ["0123456789", "1234567890123", "books", "ABC"]
actual = [d for d in dirs if MetaBook.is_valid_ebook_path(d)]
assert actual == ["1234567890123"]

77 changes: 72 additions & 5 deletions zero_to_one_hundred/tests_sb/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from zero_to_one_hundred.models.meta_book import MetaBook
from zero_to_one_hundred.models.metadata import Metadata
from zero_to_one_hundred.tests.conftest import str_relaxed


def test_init(get_config_map, persist_fs, process_fs, http_url, isbn):
Expand All @@ -12,18 +14,83 @@ def test_init(get_config_map, persist_fs, process_fs, http_url, isbn):
)
assert str(actual.isbn).endswith(isbn)
assert str(actual.http_url) == http_url
assert actual.pages_tot == 0
assert actual.page_curr == 0



def test_get_page_perc(get_config_map, persist_fs, process_fs, http_url):
actual = Metadata.get_page_perc(
{
'page_curr' : 99,
'page_tot': 999
}
)
assert actual == "9.9%"

actual = Metadata.get_page_perc(

{
'page_curr' : 0,
'page_tot': 999
}
)
assert actual == "0.0%"
actual = Metadata.get_page_perc(
{
'page_curr' : 1,
'page_tot': 0
}
)
assert actual == "n/a"

def test_asMarkDown(get_config_map, persist_fs, process_fs, http_url, isbn):
actual = Metadata(
get_config_map,
persist_fs,
process_fs,
MetaBook.get_isbn,
http_url,
99,
999,
)
assert actual.get_page_perc == "9.9%"

assert str_relaxed(actual.asMarkDown()) == str_relaxed('''
{
"isbn":"9780135956977",
"pages_perc":"n/a",
"url":"https://learning.oreilly.com/library/view/the-pragmatic-programmer/9780135956977/"
}
''')
# some rand values from json

data = '{ "abc": "123", "def": "456"}'
actual.metadata= json.loads(data)

assert str_relaxed(actual.asMarkDown()) == str_relaxed("""
{
"abc": "123", "def": "456",
"isbn":"9780135956977",
"pages_perc":"n/a",
"url":"https://learning.oreilly.com/library/view/the-pragmatic-programmer/9780135956977/"
}
""")

# calculate pages
data = ''' {
"abc": "123",
"page_curr": 10,
"page_tot": 100
}
'''
actual.metadata= json.loads(data)
print(actual.asMarkDown())
assert str_relaxed(actual.asMarkDown()) == str_relaxed("""
{
"abc": "123",
"isbn":"9780135956977",
"page_curr": 10,
"page_tot": 100,
"pages_perc":"10.0%",
"url":"https://learning.oreilly.com/library/view/the-pragmatic-programmer/9780135956977/"
}
""")



2 changes: 1 addition & 1 deletion zero_to_one_hundred/tests_sb/test_sb_config_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_provide__pass(get_config_map: SBConfigMap):
assert actual.get_oreilly_userpassword is not None
assert actual.get_oreilly_userpassword is not None
assert actual.get_split_pdf_pages == 100
assert actual.get_download_books is True
assert actual.get_download_books is not None


def test__repr__(get_config_map: SBConfigMap, get_map_yaml_path: str):
Expand Down
6 changes: 6 additions & 0 deletions zero_to_one_hundred/views/markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ class MarkdownRenderer(ABC):
@abstractmethod
def asMarkDown(self) -> str:
pass

@staticmethod
def text_lf_as_br(txt):
return txt.replace(
"\n", "<br/>"
) # trick to have LF in MD tables :P

0 comments on commit c291174

Please sign in to comment.