minor repush

sushil-rgb · Apr 30, 2023 · 69af028 · 69af028
1 parent fdee148
commit 69af028
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/main.py b/main.py
@@ -12,10 +12,10 @@ async def main():
         amazon = Amazon()
 
         # Define the URL to scrape:
-        userInput = "https://www.amazon.com/s?k=gaming+keyboard&rh=n%3A402051011&dc&ds=v1%3AmQ2bbJkh8OLoIWrFEACV3bSUJZPf%2FZg2CsMgtTXggLk&pd_rd_r=f0581525-9ff7-456e-b6df-0dadac916753&pd_rd_w=ebVVQ&pd_rd_wg=LvfFA&pf_rd_p=12129333-2117-4490-9c17-6d31baf0582a&pf_rd_r=K1S9Y5GB9QVBF7VWCAKM&qid=1682867151&rnid=2941120011&ref=sr_nr_n_1"
+        userInput = "https://www.amazon.com/s?k=health+and+beauty&i=beauty-intl-ship&bbn=16225006011&rh=n%3A11062741&dc&ds=v1%3AaTUGn90NLjQvoihGF3%2FqZ1jr%2FIFcsvhBnS3xK%2FaJ3u0&crid=2036DM6EKNYNA&pd_rd_r=fa4603d4-0acc-4de5-a94e-3f047374ec2e&pd_rd_w=LUiIR&pd_rd_wg=yiJls&pf_rd_p=c9097eb6-837b-4ba7-94d7-51428f6e8d2a&pf_rd_r=6W2WTX74X54Y6G5DMXQQ&qid=1682875043&rnid=16225006011&sprefix=health+and+beauty%2Cbeauty-intl-ship%2C173&ref=sr_nr_n_6"
 
         # Split the pagination into URLs 
-        split_links = await amazon.split_url(userInput)        
+        split_links = await amazon.split_url(userInput)            
 
         # Define the time interval between scraping requests
         time_interval = 3

diff --git a/scrapers/scraper.py b/scrapers/scraper.py
@@ -175,17 +175,17 @@ async def amazonMe(self, interval, urls):
 
         # Loop through all the URLs and scrape data from each page:
         for pages in range(len(urls)):
-            print("\n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")
+            # print("\n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")
             print(f"Scraping pages || {pages + 1}")
             print("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------")
-
-            # Wait for random interval before making next request
-            await asyncio.sleep(randomTime(interval)) 
-
+
             # Get content and soup from current URL:
             content = await self.static_connection(urls[pages])
             soup = BeautifulSoup(content, 'lxml')
 
+            # Wait for random interval before making next request
+            await asyncio.sleep(randomTime(interval)) 
+
             # Get product card contents from current page:         
             card_contents = soup.select(self.scrape['main_content'])
 
@@ -200,7 +200,7 @@ async def amazonMe(self, interval, urls):
                     'Price': await self.catch.text(datas.select_one(self.scrape['price'])),
                     'Original price': await self.catch.text(datas.select_one(self.scrape['old_price'])),
                     'Review': await self.catch.text(datas.select_one(self.scrape['review'])),
-                    'Review count': await self.catch.text(soup.select_one(self.scrape['review_count'])),
+                    'Review count': await self.catch.text(datas.select_one(self.scrape['review_count'])),
                     'Hyperlink': prod_hyperlink,
                     'Image url': f"""{await self.catch.attributes(datas.select_one(self.scrape['image']), 'src')}""",
                 }