Skip to content

Commit

Permalink
minor repush
Browse files Browse the repository at this point in the history
  • Loading branch information
sushil-rgb committed Apr 30, 2023
1 parent fdee148 commit 69af028
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ async def main():
amazon = Amazon()

# Define the URL to scrape:
userInput = "https://www.amazon.com/s?k=gaming+keyboard&rh=n%3A402051011&dc&ds=v1%3AmQ2bbJkh8OLoIWrFEACV3bSUJZPf%2FZg2CsMgtTXggLk&pd_rd_r=f0581525-9ff7-456e-b6df-0dadac916753&pd_rd_w=ebVVQ&pd_rd_wg=LvfFA&pf_rd_p=12129333-2117-4490-9c17-6d31baf0582a&pf_rd_r=K1S9Y5GB9QVBF7VWCAKM&qid=1682867151&rnid=2941120011&ref=sr_nr_n_1"
userInput = "https://www.amazon.com/s?k=health+and+beauty&i=beauty-intl-ship&bbn=16225006011&rh=n%3A11062741&dc&ds=v1%3AaTUGn90NLjQvoihGF3%2FqZ1jr%2FIFcsvhBnS3xK%2FaJ3u0&crid=2036DM6EKNYNA&pd_rd_r=fa4603d4-0acc-4de5-a94e-3f047374ec2e&pd_rd_w=LUiIR&pd_rd_wg=yiJls&pf_rd_p=c9097eb6-837b-4ba7-94d7-51428f6e8d2a&pf_rd_r=6W2WTX74X54Y6G5DMXQQ&qid=1682875043&rnid=16225006011&sprefix=health+and+beauty%2Cbeauty-intl-ship%2C173&ref=sr_nr_n_6"

# Split the pagination into URLs
split_links = await amazon.split_url(userInput)
split_links = await amazon.split_url(userInput)

# Define the time interval between scraping requests
time_interval = 3
Expand Down
12 changes: 6 additions & 6 deletions scrapers/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,17 @@ async def amazonMe(self, interval, urls):

# Loop through all the URLs and scrape data from each page:
for pages in range(len(urls)):
print("\n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")
# print("\n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")
print(f"Scraping pages || {pages + 1}")
print("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")

# Wait for random interval before making next request
await asyncio.sleep(randomTime(interval))


# Get content and soup from current URL:
content = await self.static_connection(urls[pages])
soup = BeautifulSoup(content, 'lxml')

# Wait for random interval before making next request
await asyncio.sleep(randomTime(interval))

# Get product card contents from current page:
card_contents = soup.select(self.scrape['main_content'])

Expand All @@ -200,7 +200,7 @@ async def amazonMe(self, interval, urls):
'Price': await self.catch.text(datas.select_one(self.scrape['price'])),
'Original price': await self.catch.text(datas.select_one(self.scrape['old_price'])),
'Review': await self.catch.text(datas.select_one(self.scrape['review'])),
'Review count': await self.catch.text(soup.select_one(self.scrape['review_count'])),
'Review count': await self.catch.text(datas.select_one(self.scrape['review_count'])),
'Hyperlink': prod_hyperlink,
'Image url': f"""{await self.catch.attributes(datas.select_one(self.scrape['image']), 'src')}""",
}
Expand Down

0 comments on commit 69af028

Please sign in to comment.