From 2586aacc9837219fc3e36adae7b9ddc9581f54ff Mon Sep 17 00:00:00 2001 From: Mark King Date: Mon, 1 Jul 2024 11:43:50 +0100 Subject: [PATCH] PoorlyDrawnLines: Fix after site redesign --- dosagelib/plugins/p.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 0a2cf00373..ce48c07ed8 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -228,13 +228,36 @@ def getPrevUrl(self, url, data): prefix = url.rsplit('/', 1)[0] return "%s/index%d.html" % (prefix, num) - class PoorlyDrawnLines(_ParserScraper): - url = 'http://poorlydrawnlines.com/comic/' - firstStripUrl = url + 'campus-characters/' - imageSearch = '//div[d:class("comic")]//img' + url = 'https://poorlydrawnlines.com/comic/' + stripUrl = url + '%s/' + multipleImagesPerStrip = True + firstStripUrl = stripUrl % 'hardly-essayists' + imageSearch = '//div[d:class("entry-content")]//img[@data-src]/@data-src' prevSearch = '//a[@rel="prev"]' + def shouldSkipUrl(self, url, _data): + """Skip pages without a comic.""" + skipUrls = [self.stripUrl % s for s in ( + 'hope-it-all-works-out-new-book-coming-this-fall', + 'poorly-drawn-lines-animated-series', + 'poorly-drawn-lines-episode-two', + 'watch-poorly-drawn-lines-on-hulu', + )] + return url in skipUrls + + def getPrevUrl(self, url: str, data) -> str | None: + """Skip missing comics which redirect back to home page""" + if url == self.stripUrl % '8198': + return self.stripUrl % 'excited-2' + elif url == self.stripUrl % '8186': + return self.stripUrl % 'to-hell-2' + elif url == self.stripUrl % '8177': + return self.stripUrl % 'feel-real' + elif url == self.stripUrl % '2056': + return self.stripUrl % 'stereotype' + + return super().getPrevUrl(url, data) class PoppyOPossum(WordPressScraper): baseUrl = 'https://www.poppy-opossum.com/'