Skip to content

Commit

Permalink
don't dl subscriber-only articles [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
holyspiritomb committed Oct 2, 2023
1 parent 67f551a commit 1f0cb1c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
10 changes: 10 additions & 0 deletions recipes_custom/aiweirdness.recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,13 @@ def populate_article_metadata(self, article, __, _):
if (not self.pub_date) or article.utctime > self.pub_date:
self.pub_date = article.utctime
self.title = format_title(_name, article.utctime)

def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
self.log(article)
if not article.content:
self.log.warn(f"removing subscriber-only article {article.title} from feed")
feed.articles.remove(article)
return feeds
16 changes: 13 additions & 3 deletions recipes_custom/life-is-a-sacred-text.recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from recipes_shared import BasicNewsrackRecipe, format_title
from calibre.utils.date import utcnow, parse_date
from calibre.web.feeds import Feed
from calibre.ebooks.BeautifulSoup import BeautifulSoup

_name = "Life is a Sacred Text"

Expand Down Expand Up @@ -59,10 +60,19 @@ def populate_article_metadata(self, article, soup, _):
desc_el.string = article.summary
# article_img = soup.find("img")

def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
raw_soup = BeautifulSoup(article.content)
p = raw_soup.find("p")
ptxt = self.tag_to_string(p)
if "Read more" in ptxt:
self.log.warn(f"removing subscriber-only article {article.title} from feed")
feed.articles.remove(article)
return feeds

def preprocess_html(self, soup):
paras = soup.find_all("p")
if paras and len(paras) < 4:
self.abort_article("Subscription required")
headline = soup.find("h2")
a_date = soup.new_tag("div")
a_desc = soup.new_tag("h3")
Expand Down

0 comments on commit 1f0cb1c

Please sign in to comment.