Skip to content

Commit

Permalink
PR: fix bill title
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Jan 14, 2025
1 parent 0ea7886 commit 781b861
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions scrapers/pr/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,15 +333,20 @@ def scrape_bill(self, chamber, session, url):
}
html = self.s.get(url, headers=headers, verify=False).text
page = lxml.html.fromstring(html)
# search for Titulo, accent over i messes up lxml, so use 'tulo'
title = page.xpath('//main//div[contains(@class, "items-center")]/h1/text()')[
0
].strip()
if title:
bill_id = re.findall(r"[A-Z]{2}\d{4}", title)[0]

page_header_elems = page.xpath('//main//div[contains(@class, "items-center")]/h1/text()')
if len(page_header_elems) > 0:
page_header_text = page_header_elems[0].strip()
bill_id = re.findall(r"[A-Z]{2}\d{4}", page_header_text)[0]
else:
self.logger.error(f"Bill found with no bill identifier at {url}")

bill_title_elems = page.xpath('//span/strong[text()="Título:"]/../following-sibling::span')
if len(bill_title_elems) > 0:
title = bill_title_elems[0].text_content().strip()
else:
self.logger.error(f"Bill found with no title at {url}")

# PR occasionally repeats a bill at different URLs (????)
# example:
# PC0205 https://sutra.oslpr.org/medidas/152982
Expand Down

0 comments on commit 781b861

Please sign in to comment.