Skip to content

Commit

Permalink
fix update review pipeline
Browse files Browse the repository at this point in the history
headless does not work with glassdoor anymore
  • Loading branch information
pogopaule committed Mar 9, 2024
1 parent efd38e3 commit 93f2627
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 52 deletions.
42 changes: 0 additions & 42 deletions .github/workflows/ratings.yml

This file was deleted.

2 changes: 1 addition & 1 deletion devenv.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
packages = [
pkgs.chromedriver
pkgs.chromium
# pkgs.chromium
];

languages.python = {
Expand Down
24 changes: 15 additions & 9 deletions src/update_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ def dump_data(data):


chrome_options = Options()
chrome_options.add_argument("--headless")
binary_location = "/nix/store/5hmqjx40frw4cf3gm2zz66s6hzrr0pjc-chromium-106.0.5249.61/bin/chromium"
# chrome_options.add_argument("--headless")
binary_location = (
"/nix/store/5hmqjx40frw4cf3gm2zz66s6hzrr0pjc-chromium-106.0.5249.61/bin/chromium"
)
chrome_options.binary_location = binary_location
driver = webdriver.Chrome(options=chrome_options)

Expand All @@ -45,7 +47,6 @@ def dump_data(data):

for index, job in enumerate(jobs):
url = job["review"]
job["rating"] = None
if url:
print(url + ": ", end="", flush=True)

Expand All @@ -56,23 +57,28 @@ def dump_data(data):
try:
offset = 56
rating = float(
content[index + offset: index +
offset + 3].replace(",", ".")
content[index + offset : index + offset + 3].replace(
",", "."
)
)
print(rating)
job["rating"] = rating
except ValueError as error:
print(error)

if url.startswith("https://www.glassdoor"):
try:
driver.get(url)
rating = driver.find_element(
By.CLASS_NAME, "v2__EIReviewsRatingsStylesV2__ratingNum").text
By.CLASS_NAME,
"rating-headline-average__rating-headline-average-module__rating",
).text
print(rating)
job["rating"] = rating
except Exception as e:
print("error getting rating", e)

print(rating)
job["rating"] = rating
time.sleep(random.randint(1, 4))
time.sleep(random.randint(10, 20))
dump_data(data)

except yaml.YAMLError as error:
Expand Down

0 comments on commit 93f2627

Please sign in to comment.