From 6ede8622cc08a5712d22a8b77c3c0c2a14fc1a06 Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Thu, 5 Oct 2023 11:43:00 -0700 Subject: [PATCH] - pending listing support - removal of pending_or_contingent param --- README.md | 11 ++----- homeharvest/__init__.py | 3 -- homeharvest/cli.py | 10 +----- homeharvest/core/scrapers/__init__.py | 2 -- homeharvest/core/scrapers/models.py | 1 + homeharvest/core/scrapers/realtor/__init__.py | 24 +++++++++----- pyproject.toml | 2 +- tests/test_realtor.py | 33 +++++++++++++++++-- 8 files changed, 53 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 77aaf3b..de40c53 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,8 @@ filename = f"HomeHarvest_{current_timestamp}.csv" properties = scrape_property( location="San Diego, CA", - listing_type="sold", # or (for_sale, for_rent) + listing_type="sold", # or (for_sale, for_rent, pending) past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent) - # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings # mls_only=True, # only fetch MLS listings # proxy="http://user:pass@host:port" # use a proxy to change your IP address ) @@ -69,7 +68,7 @@ positional arguments: location Location to scrape (e.g., San Francisco, CA) options: - -l {for_sale,for_rent,sold}, --listing_type {for_sale,for_rent,sold} + -l {for_sale,for_rent,sold,pending}, --listing_type {for_sale,for_rent,sold,pending} Listing type to scrape -o {excel,csv}, --output {excel,csv} Output format @@ -81,9 +80,6 @@ options: -r RADIUS, --radius RADIUS Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses. -m, --mls_only If set, fetches only MLS listings. - -c, --pending_or_contingent - If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches. - ``` ```bash homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest @@ -110,6 +106,7 @@ Required - 'for_rent' - 'for_sale' - 'sold' + - 'pending' Optional ├── radius (decimal): Radius in miles to find comparable properties based on individual addresses. @@ -117,8 +114,6 @@ Optional │ ├── past_days (integer): Number of past days to filter properties. Utilizes 'last_sold_date' for 'sold' listing types, and 'list_date' for others (for_rent, for_sale). │ Example: 30 (fetches properties listed/sold in the last 30 days) -| -├── pending_or_contingent (True/False): If set, fetches only pending or contingent listings. Only applicable for `for_sale listings` from general area searches. │ ├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings) │ diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 9db3f69..da478fb 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -13,7 +13,6 @@ def scrape_property( radius: float = None, mls_only: bool = False, past_days: int = None, - pending_or_contingent: bool = False, proxy: str = None, ) -> pd.DataFrame: """ @@ -23,7 +22,6 @@ def scrape_property( :param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses. :param mls_only: If set, fetches only listings with MLS IDs. :param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days. - :param pending_or_contingent: If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches. :param proxy: Proxy to use for scraping """ validate_input(listing_type) @@ -35,7 +33,6 @@ def scrape_property( radius=radius, mls_only=mls_only, last_x_days=past_days, - pending_or_contingent=pending_or_contingent, ) site = RealtorScraper(scraper_input) diff --git a/homeharvest/cli.py b/homeharvest/cli.py index 881572f..950c1e4 100644 --- a/homeharvest/cli.py +++ b/homeharvest/cli.py @@ -14,7 +14,7 @@ def main(): "--listing_type", type=str, default="for_sale", - choices=["for_sale", "for_rent", "sold"], + choices=["for_sale", "for_rent", "sold", "pending"], help="Listing type to scrape", ) @@ -60,13 +60,6 @@ def main(): help="If set, fetches only MLS listings.", ) - parser.add_argument( - "-c", - "--pending_or_contingent", - action="store_true", - help="If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.", - ) - args = parser.parse_args() result = scrape_property( @@ -76,7 +69,6 @@ def main(): proxy=args.proxy, mls_only=args.mls_only, past_days=args.days, - pending_or_contingent=args.pending_or_contingent, ) if not args.filename: diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index f8a108e..2871e2d 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -11,7 +11,6 @@ class ScraperInput: mls_only: bool | None = None proxy: str | None = None last_x_days: int | None = None - pending_or_contingent: bool | None = None class Scraper: @@ -37,7 +36,6 @@ def __init__( self.radius = scraper_input.radius self.last_x_days = scraper_input.last_x_days self.mls_only = scraper_input.mls_only - self.pending_or_contingent = scraper_input.pending_or_contingent def search(self) -> list[Property]: ... diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index a8ae258..c8633fd 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -19,6 +19,7 @@ def get_by_value(cls, value): class ListingType(Enum): FOR_SALE = "FOR_SALE" FOR_RENT = "FOR_RENT" + PENDING = "PENDING" SOLD = "SOLD" diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 532efbb..f494441 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -18,7 +18,6 @@ class RealtorScraper(Scraper): ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest" def __init__(self, scraper_input): - self.counter = 1 super().__init__(scraper_input) def handle_location(self): @@ -274,6 +273,10 @@ def general_search( last_sold_date list_price price_per_sqft + flags { + is_contingent + is_pending + } description { sqft beds @@ -335,17 +338,19 @@ def general_search( pending_or_contingent_param = ( "or_filters: { contingent: true, pending: true }" - if self.pending_or_contingent + if self.listing_type == ListingType.PENDING else "" ) + listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type + if search_type == "comps": #: comps search, came from an address query = """query Property_search( $coordinates: [Float]! $radius: String! $offset: Int!, ) { - property_search( + home_search( query: { nearby: { coordinates: $coordinates @@ -353,13 +358,15 @@ def general_search( } status: %s %s + %s } %s limit: 200 offset: $offset ) %s""" % ( - self.listing_type.value.lower(), + listing_type.value.lower(), date_param, + pending_or_contingent_param, sort_param, results_query, ) @@ -385,7 +392,7 @@ def general_search( limit: 200 offset: $offset ) %s""" % ( - self.listing_type.value.lower(), + listing_type.value.lower(), date_param, pending_or_contingent_param, sort_param, @@ -415,7 +422,7 @@ def general_search( response = self.session.post(self.SEARCH_GQL_URL, json=payload) response.raise_for_status() response_json = response.json() - search_key = "home_search" if search_type == "area" else "property_search" + search_key = "home_search" if "home_search" in query else "property_search" properties: list[Property] = [] @@ -430,7 +437,6 @@ def general_search( return {"total": 0, "properties": []} for result in response_json["data"][search_key]["results"]: - self.counter += 1 mls = ( result["source"].get("id") if "source" in result and isinstance(result["source"], dict) @@ -447,13 +453,15 @@ def general_search( and result["location"]["address"].get("coordinate") ) + is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent") + realty_property = Property( mls=mls, mls_id=result["source"].get("listing_id") if "source" in result and isinstance(result["source"], dict) else None, property_url=f"{self.PROPERTY_URL}{result['property_id']}", - status=result["status"].upper(), + status="PENDING" if is_pending else result["status"].upper(), list_price=result["list_price"], list_date=result["list_date"].split("T")[0] if result.get("list_date") diff --git a/pyproject.toml b/pyproject.toml index f85d587..8571dee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.3" +version = "0.3.4" description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest" diff --git a/tests/test_realtor.py b/tests/test_realtor.py index 05fa0c6..77ae071 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -8,12 +8,12 @@ def test_realtor_pending_or_contingent(): pending_or_contingent_result = scrape_property( location="Surprise, AZ", - pending_or_contingent=True, + listing_type="pending" ) regular_result = scrape_property( location="Surprise, AZ", - pending_or_contingent=False, + listing_type="for_sale" ) assert all( @@ -25,6 +25,35 @@ def test_realtor_pending_or_contingent(): assert len(pending_or_contingent_result) != len(regular_result) +def test_realtor_pending_comps(): + pending_comps = scrape_property( + location="2530 Al Lipscomb Way", + radius=5, + past_days=180, + listing_type="pending", + ) + + for_sale_comps = scrape_property( + location="2530 Al Lipscomb Way", + radius=5, + past_days=180, + listing_type="for_sale", + ) + + sold_comps = scrape_property( + location="2530 Al Lipscomb Way", + radius=5, + past_days=180, + listing_type="sold", + ) + + results = [pending_comps, for_sale_comps, sold_comps] + assert all([result is not None for result in results]) + + #: assert all lengths are different + assert len(set([len(result) for result in results])) == len(results) + + def test_realtor_comps(): result = scrape_property( location="2530 Al Lipscomb Way",