Skip to content

Commit

Permalink
172 migrating from setuppy to pyprojecttoml (#173)
Browse files Browse the repository at this point in the history
* - Moved all project settings to pyproject.toml
- Added a minimal setup.cfg for flake8
- Changed setup.py to minimal file for backwards compatability
- Added in flake8 and isort for mode code consistency
- Added pre-commit hooks for flake8 and isort

* Updated readme file

* Updated files and accounted for flake8 and isort issues
- Updated files with isort for consistent imports
- Fixed all flake8 issues
- Changed imports to ensure they are accepted by flake8, and fixed
  circular dependencies

* Removed placeholder emails from pyproject.toml
  • Loading branch information
sammytheindi authored Sep 29, 2024
1 parent 72faea2 commit 493af1b
Show file tree
Hide file tree
Showing 28 changed files with 337 additions and 121 deletions.
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,12 @@ repos:
rev: "v3.1.0" # Specify Prettier version
hooks:
- id: prettier
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
3 changes: 2 additions & 1 deletion jobfunnel/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"""
import os
import sys

from .backend.jobfunnel import JobFunnel
from .config import parse_cli, build_config_dict, get_config_manager
from .config import build_config_dict, get_config_manager, parse_cli


def main():
Expand Down
2 changes: 2 additions & 0 deletions jobfunnel/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from jobfunnel.backend.job import Job, JobStatus

__all__ = ["Job", "JobStatus"]
8 changes: 4 additions & 4 deletions jobfunnel/backend/jobfunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,27 @@
"""

import csv
from datetime import date, datetime, timedelta
import json
import os
import pickle
from datetime import date, datetime, timedelta
from time import time
from typing import Dict, List
from typing import Dict

from requests import Session

from jobfunnel import __version__
from jobfunnel.backend import Job
from jobfunnel.backend.tools import Logger
from jobfunnel.backend.tools.filters import DuplicatedJob, JobFilter
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.config import JobFunnelConfigManager
from jobfunnel.resources import (
CSV_HEADER,
T_NOW,
Remoteness,
DuplicateType,
JobStatus,
Locale,
Remoteness,
)


Expand Down
2 changes: 1 addition & 1 deletion jobfunnel/backend/scrapers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
Paul McInnis 2020
"""

import random
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor, as_completed
from multiprocessing import Lock, Manager
import random
from time import sleep
from typing import Any, Dict, List, Optional

Expand Down
5 changes: 2 additions & 3 deletions jobfunnel/backend/scrapers/glassdoor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
FIXME: this is currently unable to get past page 1 of job results.
"""

import re
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor, wait
from math import ceil
import re
from typing import Any, Dict, List, Tuple, Union

from bs4 import BeautifulSoup
Expand All @@ -15,10 +15,9 @@
from jobfunnel.backend.scrapers.base import (
BaseCANEngScraper,
BaseScraper,
BaseUSAEngScraper,
BaseUKEngScraper,
BaseUSAEngScraper,
)
from jobfunnel.backend.tools import get_webdriver
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
from jobfunnel.resources import MAX_CPU_WORKERS, JobField
Expand Down
14 changes: 7 additions & 7 deletions jobfunnel/backend/scrapers/indeed.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
"""Scraper designed to get jobs from www.indeed.X
"""

import re
from concurrent.futures import ThreadPoolExecutor, wait
import json
from math import ceil
import random
import re
from typing import Any, Dict, List, Optional
from unicodedata import normalize
import json
import random

from bs4 import BeautifulSoup
from requests import Session

from jobfunnel.backend import Job
from jobfunnel.backend.scrapers.base import (
BaseCANEngScraper,
BaseDEGerScraper,
BaseFRFreScraper,
BaseScraper,
BaseUSAEngScraper,
BaseUKEngScraper,
BaseFRFreScraper,
BaseDEGerScraper,
BaseUSAEngScraper,
)
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
from jobfunnel.resources import (
MAX_CPU_WORKERS,
USER_AGENT_LIST_MOBILE,
JobField,
Remoteness,
USER_AGENT_LIST_MOBILE,
)

# pylint: disable=using-constant-test,unused-import
Expand Down
6 changes: 3 additions & 3 deletions jobfunnel/backend/scrapers/monster.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Scrapers for www.monster.X
"""

import re
from abc import abstractmethod
from math import ceil
import re
from typing import Any, Dict, List, Optional

from bs4 import BeautifulSoup
Expand All @@ -12,10 +12,10 @@
from jobfunnel.backend import Job
from jobfunnel.backend.scrapers.base import (
BaseCANEngScraper,
BaseFRFreScraper,
BaseScraper,
BaseUSAEngScraper,
BaseUKEngScraper,
BaseFRFreScraper,
BaseUSAEngScraper,
)
from jobfunnel.backend.tools.filters import JobFilter
from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str
Expand Down
23 changes: 11 additions & 12 deletions jobfunnel/backend/scrapers/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,25 @@
TODO: there must be a better way to do this by using class attrib of Provider
"""

from jobfunnel.resources import Locale, Provider

from jobfunnel.backend.scrapers.glassdoor import (
GlassDoorScraperCANEng,
GlassDoorScraperUKEng,
GlassDoorScraperUSAEng,
)
from jobfunnel.backend.scrapers.indeed import (
IndeedScraperCANEng,
IndeedScraperUSAEng,
IndeedScraperUKEng,
IndeedScraperFRFre,
IndeedScraperDEGer,
IndeedScraperFRFre,
IndeedScraperUKEng,
IndeedScraperUSAEng,
)
from jobfunnel.backend.scrapers.monster import (
MonsterScraperCANEng,
MonsterScraperUSAEng,
MonsterScraperUKEng,
MonsterScraperFRFre,
MonsterScraperUKEng,
MonsterScraperUSAEng,
)
from jobfunnel.backend.scrapers.glassdoor import (
GlassDoorScraperCANEng,
GlassDoorScraperUSAEng,
GlassDoorScraperUKEng,
)
from jobfunnel.resources import Locale, Provider

SCRAPER_FROM_LOCALE = {
# search terms which one to use
Expand Down
4 changes: 3 additions & 1 deletion jobfunnel/backend/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from jobfunnel.backend.tools.tools import get_webdriver, get_logger, Logger
from jobfunnel.backend.tools.tools import Logger, get_logger, get_webdriver

__all__ = ["get_webdriver", "get_logger", "Logger"]

# NOTE: we can't import delays here or we cause circular import.
2 changes: 1 addition & 1 deletion jobfunnel/backend/tools/delay.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from numpy import arange
from scipy.special import expit # pylint: disable=no-name-in-module

from jobfunnel.config import DelayConfig
from jobfunnel.config.delay import DelayConfig
from jobfunnel.resources import DelayAlgorithm


Expand Down
2 changes: 1 addition & 1 deletion jobfunnel/backend/tools/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
Paul McInnis 2020
"""

import logging
from collections import namedtuple
from copy import deepcopy
from datetime import datetime
import logging
from typing import Dict, List, Optional, Tuple

import nltk
Expand Down
4 changes: 1 addition & 3 deletions jobfunnel/backend/tools/tools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Assorted tools for all aspects of funnelin' that don't fit elsewhere
"""

from datetime import date, datetime, timedelta
import logging
import re
import sys
from datetime import date, datetime, timedelta
from typing import Optional

from dateutil.relativedelta import relativedelta
Expand All @@ -14,8 +14,6 @@
from webdriver_manager.microsoft import EdgeChromiumDriverManager, IEDriverManager
from webdriver_manager.opera import OperaDriverManager

from jobfunnel.backend import Job

# Initialize list and store regex objects of date quantifiers
HOUR_REGEX = re.compile(r"(\d+)(?:[ +]{1,3})?(?:hour|hr|heure)")
DAY_REGEX = re.compile(r"(\d+)(?:[ +]{1,3})?(?:day|d|jour)")
Expand Down
19 changes: 16 additions & 3 deletions jobfunnel/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
from jobfunnel.config.settings import SettingsValidator, SETTINGS_YAML_SCHEMA
from jobfunnel.config.base import BaseConfig
from jobfunnel.config.cli import build_config_dict, get_config_manager, parse_cli
from jobfunnel.config.delay import DelayConfig
from jobfunnel.config.manager import JobFunnelConfigManager
from jobfunnel.config.proxy import ProxyConfig
from jobfunnel.config.search import SearchConfig
from jobfunnel.config.manager import JobFunnelConfigManager
from jobfunnel.config.cli import parse_cli, get_config_manager, build_config_dict
from jobfunnel.config.settings import SETTINGS_YAML_SCHEMA, SettingsValidator

__all__ = [
"SettingsValidator",
"SETTINGS_YAML_SCHEMA",
"BaseConfig",
"DelayConfig",
"ProxyConfig",
"SearchConfig",
"JobFunnelConfigManager",
"parse_cli",
"get_config_manager",
"build_config_dict",
]
30 changes: 20 additions & 10 deletions jobfunnel/config/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,33 @@
"""

import argparse
from typing import Dict, Any, List
from typing import Any, Dict, List

import yaml

from jobfunnel.config import (
DelayConfig,
JobFunnelConfigManager,
ProxyConfig,
SearchConfig,
SettingsValidator,
)
from jobfunnel.config.delay import DelayConfig
from jobfunnel.config.manager import JobFunnelConfigManager
from jobfunnel.config.proxy import ProxyConfig
from jobfunnel.config.search import SearchConfig
from jobfunnel.config.settings import SettingsValidator
from jobfunnel.resources import (
LOG_LEVEL_NAMES,
DelayAlgorithm,
Locale,
Provider,
Remoteness,
)
from jobfunnel.resources.defaults import *
from jobfunnel.resources.defaults import (
DEFAULT_COMPANY_BLOCK_LIST,
DEFAULT_DELAY_ALGORITHM,
DEFAULT_DELAY_MAX_DURATION,
DEFAULT_DELAY_MIN_DURATION,
DEFAULT_LOG_LEVEL_NAME,
DEFAULT_MAX_LISTING_DAYS,
DEFAULT_PROVIDER_NAMES,
DEFAULT_REMOTENESS,
DEFAULT_SEARCH_RADIUS,
)


def parse_cli(args: List[str]) -> Dict[str, Any]:
Expand Down Expand Up @@ -153,7 +162,7 @@ def parse_cli(args: List[str]) -> Dict[str, Any]:
"-l",
dest="search.locale",
type=str,
choices=[l.name for l in Locale],
choices=[locale.name for locale in Locale],
help="Global location and language to use to scrape the job provider"
" website (i.e. -l CANADA_ENGLISH -p indeed --> indeed.ca).",
required=True,
Expand Down Expand Up @@ -305,6 +314,7 @@ def build_config_dict(args_dict: Dict[str, Any]) -> Dict[str, Any]:
"""Parse the JobFunnel configuration settings and combine CLI, YAML and
defaults to build a valid config dictionary for initializing config objects.
"""

# Build a config that respects CLI, defaults and YAML
# NOTE: we a passed settings YAML first so we can inject CLI after if needed
if "settings_yaml_file" in args_dict:
Expand Down
5 changes: 4 additions & 1 deletion jobfunnel/config/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from typing import List, Optional

from jobfunnel.backend.scrapers.registry import SCRAPER_FROM_LOCALE
from jobfunnel.config import BaseConfig, DelayConfig, ProxyConfig, SearchConfig
from jobfunnel.config.base import BaseConfig
from jobfunnel.config.delay import DelayConfig
from jobfunnel.config.proxy import ProxyConfig
from jobfunnel.config.search import SearchConfig
from jobfunnel.resources import BS4_PARSER

# pylint: disable=using-constant-test,unused-import
Expand Down
2 changes: 1 addition & 1 deletion jobfunnel/config/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def validate(self) -> None:
try:
# try to create an IPv4 address
ipaddress.IPv4Address(self.ip_address)
except:
except Exception:
raise ValueError(f"{self.ip_address} is not a valid IPv4 address")
assert isinstance(self.port, int), "Port must be an integer"
assert self.protocol, "Protocol is not set"
7 changes: 4 additions & 3 deletions jobfunnel/config/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
"""

from typing import List, Optional

from jobfunnel.config import BaseConfig
from jobfunnel.resources import Locale, Provider, Remoteness
from jobfunnel.resources.defaults import (
DEFAULT_SEARCH_RADIUS,
DEFAULT_MAX_LISTING_DAYS,
DEFAULT_DOMAIN_FROM_LOCALE,
DEFAULT_MAX_LISTING_DAYS,
DEFAULT_SEARCH_RADIUS,
)


Expand Down Expand Up @@ -65,7 +66,7 @@ def __init__(

# Try to infer the domain string based on the locale.
if not domain:
if not self.locale in DEFAULT_DOMAIN_FROM_LOCALE:
if self.locale not in DEFAULT_DOMAIN_FROM_LOCALE:
raise ValueError(f"Unknown domain for locale: {self.locale}")
self.domain = DEFAULT_DOMAIN_FROM_LOCALE[self.locale]
else:
Expand Down
Loading

1 comment on commit 493af1b

@anoduck
Copy link

@anoduck anoduck commented on 493af1b Nov 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot of changes there, but considering how long it has been, quite understandable.

Please sign in to comment.