Merge branch 'master' into data-errors

codeforsanjose · Aug 20, 2021 · 683ba5a · 683ba5a
2 parents 8e5f443 + 18f607d
commit 683ba5a
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 18 deletions.
diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -5,9 +5,10 @@ RUN apk update && apk add build-base autoconf automake libtool pkgconfig nasm
 # Add the package.json file and build the node_modules folder
 WORKDIR /app
 COPY ./package*.json ./
-RUN apk add --no-cache --virtual .gyp \
-        python
-RUN mkdir node_modules && yarn install
+
+RUN mkdir node_modules
+RUN apk update && apk add yarn python g++ make && rm -rf /var/cache/apk/*
+RUN yarn install
 
 # Get a clean image with gatsby-cli and the pre-built node modules
 FROM node:12-alpine

diff --git a/data_pipeline/scraper/scraper.py b/data_pipeline/scraper/scraper.py
@@ -1,5 +1,6 @@
 import os
 import time
+
 from time import sleep
 
 from selenium import webdriver
@@ -84,9 +85,9 @@ def verifySearchTableLoadComplete(self, driver):
     def verifyDownloadFormTableLoadComplete(self, driver):
         WebDriverWait(driver, 10).until(
             EC.presence_of_element_located((By.ID, self.FORM_TABLE_MAIN_TABLE_ID))
-        )
+        ) 
 
-    def downloadExcel(self, driver):
+    def downloadExcel(self, driver, countFile):
         # Finds all the Excel files linked on the page and downloads them.
         # First create array that handles ammendments, to ensure we're only downloading the latest/most accurate
         numFormTableRows = driver.find_elements_by_xpath(
@@ -118,8 +119,19 @@ def downloadExcel(self, driver):
                 else:
                     downloadLinkElement.click()
                     count += 1
+
+                    while(1):
+                        if os.path.exists('./data/transactionExportGrid.xls'):
+                            countFile += 1
+                            renamedFile = './data/transactionExportGrid' + '(' + str(countFile) + ').xls'
+                            os.rename('./data/transactionExportGrid.xls', renamedFile)
+                            break
+                        sleep(0.1)
+
+
         print('NUM DOWNLOADS {}'.format(count))
         self.preprocessing.insertColumns(count, self.CANDIDATENAME, self.ELECTIONDATE, self.BALLOTITEM)
+        return countFile
 
     # Returns a boolean.
     def errorDialogExists(self, driver):
@@ -229,14 +241,12 @@ def __init__(self):
 
         options = webdriver.ChromeOptions()
 
-        # Uncomment block BELOW for headless data-retrieval
-        # --> Currently not working 100%, only downloads first link on form table
-        isHeadless = os.environ.get('HEADLESS', False)
+        # enable headless data retrieval 
+        isHeadless = os.environ.get('HEADLESS', True)
         if isHeadless:
             options.add_argument("--headless")
-        # options.add_argument("--disable-gpu")
-        # options.add_argument("--window-size=1280,800")
-        # Uncomment block ABOVE for headless data-retrieval
+        options.add_argument("--disable-gpu")
+        options.add_argument("--window-size=1280,800")
 
         options.add_argument("--ignore-certificate-errors")
         options.add_argument("--test_type")
@@ -256,11 +266,14 @@ def __init__(self):
         options.add_experimental_option("prefs", prefs)
         self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
 
+
     def scrape(self, election_cycle=None):
         # Navigate to https://www.southtechhosting.com/SanJoseCity/CampaignDocsWebRetrieval/Search/SearchByElection.aspx
         self.website.navigateToSearchPage(self.driver, self.SEARCH_FORM_ADDRESS, election_cycle=election_cycle)
         self.website.verifySearchTableLoadComplete(self.driver)
 
+        countFile = 0
+
         for search_page_num in range(1, self.website.numPages(self.driver) + 1):
             print('PAGE {}'.format(search_page_num))
             # Need to navigate to the page upfront so that when we get the number of entries on the page it is accurate.
@@ -283,7 +296,7 @@ def scrape(self, election_cycle=None):
                 else:
                     # If there are forms, then we will be brought to the "forms" page.
                     self.website.verifyDownloadFormTableLoadComplete(self.driver)
-                    self.website.downloadExcel(self.driver)
+                    countFile = self.website.downloadExcel(self.driver, countFile)
 
                     self.website.clickBackButton(self.driver)
                     self.website.verifySearchTableLoadComplete(self.driver)
@@ -293,7 +306,7 @@ def scrape(self, election_cycle=None):
 
         # Custom module to aggregate data into single CSV
         self.website.preprocessing.aggregateData()
-
+"""
 start_time = time.time()
 s = Scraper()
 
@@ -305,3 +318,4 @@ def scrape(self, election_cycle=None):
         time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))
     )
 )
+"""
diff --git a/src/pages/aboutUs.js b/src/pages/aboutUs.js
@@ -20,25 +20,22 @@ export default function AboutUs() {
   const currentTeam = sortTeamByAlphabeticalOrder([
     {
       name: "Alex P",
-      position: "Frontend / Co-lead",
+      position: "Frontend",
       github: "alessandro-pianetta",
       image: Alex,
-      lead: true,
     },
     {
       name: "Geraldine E",
       position: "Backend",
       github: "geleazar1000111",
       image: Geraldine,
     },
-    { name: "Ryan W", position: "Backend", image: Ryan },
+    { name: "Ryan W", position: "Backend / Co-lead", image: Ryan, lead: true },
     { name: "Darren P", position: "Backend / Co-lead", lead: true },
-    { name: "Emily J", position: "Frontend" },
     { name: "Mark N", position: "Frontend" },
     { name: "Coco M", position: "Backend" },
     { name: "Diane L", position: "UX & Design" },
     { name: "Irina R", position: "UX & Design" },
-    { name: "Yan-Yin C", position: "Frontend"}
   ])
   const alumni = sortTeamByAlphabeticalOrder([
     { name: "Helen", position: "Project Lead", lead: true },
@@ -57,6 +54,8 @@ export default function AboutUs() {
     { name: "Lynna J", position: "Fullstack" },
     { name: "Gajan N", position: "Fullstack" },
     { name: "Nicole", position: "Fullstack" },
+    { name: "Emily J", position: "Frontend" },
+    { name: "Yan-Yin C", position: "Frontend" },
   ])
 
   return (