diff --git a/README.rst b/README.rst index d6c4fd6..a1fd02b 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ Features - **login** to Geocaching.com - **search** caches - - normal search (up to 200 caches around any point) - - **NEW:** quick search (all caches inside some area) + - normal search (unlimited number of caches from any point) + - quick search (all caches inside some area) - **load cache** details by WP @@ -52,7 +52,7 @@ Requirements ~~~~~~~~~~~~ - Python >= 3.0 (3.4 required for running tests) -- MechanicalSoup >= 0.2.0 +- MechanicalSoup >= 0.3.1 - geopy >= 1.0.0 diff --git a/pycaching/cache.py b/pycaching/cache.py index 0ad37a7..8ebbdec 100644 --- a/pycaching/cache.py +++ b/pycaching/cache.py @@ -96,28 +96,27 @@ class Cache(object): "wirelessbeacon": "Wireless Beacon" } + # either key and value is tuple of synonyms _possible_types = { # key is cache image url, used for parsing: http://www.geocaching.com/images/WptTypes/[KEY].gif - "2": "Traditional Cache", - "3": "Multi-cache", - "8": "Mystery Cache", - "__8": "Unknown Cache", # same as Mystery, key not used - "5": "Letterbox hybrid", - "6": "Event Cache", - "mega": "Mega-Event Cache", - "giga": "Giga-Event Cache", - "earthcache": "Earthcache", - "137": "Earthcache", - "13": "Cache in Trash out Event", - "11": "Webcam Cache", - "4": "Virtual Cache", - "1858": "Wherigo Cache", - "10Years_32": "Lost and Found Event Cache", - "ape_32": "Project Ape Cache", - "HQ_32": "Groundspeak HQ", - "1304": "GPS Adventures Exhibit", - "4738": "Groundspeak Block Party", - "12": "Locationless (Reverse) Cache", + ("2", ): ("Traditional", ), + ("3", ): ("Multi-cache", ), + ("8", ): ("Mystery", "Unknown", ), + ("5", ): ("Letterbox hybrid", ), + ("6", ): ("Event", ), + ("mega", ): ("Mega-Event", ), + ("giga", ): ("Giga-Event", ), + ("137", "earthcache", ): ("Earthcache", ), + ("13", ): ("Cache in Trash out Event", "CITO", ), + ("11", ): ("Webcam", ), + ("4", ): ("Virtual", ), + ("1858", ): ("Wherigo", ), + ("10Years_32", ): ("Lost and Found Event", ), + ("ape_32", ): ("Project Ape", ), + ("HQ_32", ): ("Groundspeak HQ", ), + ("1304", ): ("GPS Adventures Exhibit", ), + ("4738", ): ("Groundspeak Block Party", ), + ("12", ): ("Locationless (Reverse)", ), } _possible_sizes = { @@ -225,14 +224,30 @@ def cache_type(self): @cache_type.setter def cache_type(self, cache_type): + cache_type = cache_type.replace(" Geocache", "") # with space! + cache_type = cache_type.replace(" Cache", "") # with space! cache_type = cache_type.strip() - cache_type = cache_type.replace("Geocache", "Cache") - if cache_type in self._possible_types.values(): # try to search in values - self._cache_type = cache_type - elif cache_type in self._possible_types.keys(): # not in values => it must be a key - self._cache_type = self._possible_types[cache_type] - else: - raise ValueError("Cache type '{}' is not possible.".format(cache_type)) + + # walk trough each type and its synonyms + for key, value in self._possible_types.items(): + for synonym in value: + if cache_type.lower() == synonym.lower(): + self._cache_type = self._possible_types[key][0] + return + + raise ValueError("Cache type '{}' is not possible.".format(cache_type)) + + @classmethod + def get_cache_type_by_img(cls, src): + """Returns cache type by its image src""" + # parse src (http://www.geocaching.com/images/WptTypes/[KEY].gif) + img_name = src.split("/")[-1].rsplit(".", 1)[0] + + # walk trough each key and its synonyms + for key in cls._possible_types.keys(): + for synonym in key: + if img_name == synonym: + return cls._possible_types[key][0] @property @lazy_loaded @@ -311,7 +326,7 @@ def hidden(self, hidden): if type(hidden) is str: hidden = Util.parse_date(hidden) elif type(hidden) is not datetime.date: - raise ValueError("Passed object is not datetime.date instance nor string containing date.") + raise ValueError("Passed object is not datetime.date instance nor string containing a date.") self._hidden = hidden @property diff --git a/pycaching/geocaching.py b/pycaching/geocaching.py index 2c41fa8..52def6f 100644 --- a/pycaching/geocaching.py +++ b/pycaching/geocaching.py @@ -3,9 +3,9 @@ import logging import math import requests -import bs4 import mechanicalsoup as ms from urllib.parse import urlencode +from bs4 import BeautifulSoup from pycaching.area import Area from pycaching.cache import Cache from pycaching.util import Util @@ -35,7 +35,8 @@ class Geocaching(object): _urls = { "login_page": _baseurl + "login/default.aspx", "cache_details": _baseurl + "geocache/{wp}", - "caches_nearest": _baseurl + "seek/nearest.aspx", + "search": _baseurl + "play/search", + "search_more": _baseurl + "play/search/more-results", "geocode": _baseurl + "api/geocode", "map": _tile_url + "map.details", "tile": _tile_url + "map.png", @@ -132,105 +133,84 @@ def search(self, point, limit=0): logging.info("Searching at %s...", point) - page_num = 1 - cache_num = 0 + start_index = 0 while True: - try: # try to load search page - page = self._search_get_page(point, page_num) - except requests.exceptions.ConnectionError as e: - raise StopIteration("Cannot load search page.") from e - - for cache in page: - yield cache - - cache_num += 1 - if limit > 0 and cache_num >= limit: - raise StopIteration() - - page_num += 1 - - @login_needed - def _search_get_page(self, point, page_num): - """Returns one page of caches as a list. + # get one page + page = self._search_get_page(point, start_index) - Searches for a caches around a point and returns N-th page (specifiend by page argument).""" - - assert isinstance(point, Point) - assert type(page_num) is int - - logging.info("Fetching page %d.", page_num) - - # assemble request - params = urlencode({"lat": point.latitude, "lng": point.longitude}) - url = self._urls["caches_nearest"] + "?" + params - - # we have to add POST for other pages than 1st - if page_num == 1: - post = None - else: - # TODO handle searching on second page without first - post = self._pagging_helpers - post["__EVENTTARGET"] = self._pagging_postbacks[page_num] - post["__EVENTARGUMENT"] = "" + if not page: + # result is empty - no more caches + raise StopIteration() - # make request - try: - root = self._browser.post(url, post).soup - except requests.exceptions.ConnectionError as e: - raise Error("Cannot load search page #{}.".format(page_num)) from e + # parse caches in result + for start_index, row in enumerate(BeautifulSoup(page).find_all("tr"), start_index): - # root of a few following elements - widget_general = root.find_all("td", "PageBuilderWidget") + if limit > 0 and start_index == limit: + raise StopIteration() - # parse pagging widget - caches_total, page_num, page_count = [int(elm.text) for elm in widget_general[0].find_all("b")] - logging.debug("Found %d results. Showing page %d of %d.", caches_total, page_num, page_count) + # parse raw data + cache_details = row.find("span", "cache-details").text.split("|") + wp = cache_details[1].strip() + + # create and fill cache object + c = Cache(wp, self) + c.cache_type = cache_details[0].strip() + c.name = row.find("span", "cache-name").text + c.found = row.find("img", title="Found It!") is not None + c.favorites = int(row.find(attrs={"data-column": "FavoritePoint"}).text) + c.state = not (row.get("class") and "disabled" in row.get("class")) + c.pm_only = row.find("td", "pm-upsell") is not None + + if c.pm_only: + # PM only caches doesn't have other attributes filled in + yield c + continue - # save search postbacks for future usage - if page_num == 1: - pagging_links = [_ for _ in widget_general[1].find_all("a") if _.get("id")] - self._pagging_postbacks = {int(link.text): link.get("href").split("'")[1] for link in pagging_links} + c.size = row.find(attrs={"data-column": "ContainerSize"}).text + c.difficulty = float(row.find(attrs={"data-column": "Difficulty"}).text) + c.terrain = float(row.find(attrs={"data-column": "Terrain"}).text) + c.hidden = Util.parse_date(row.find(attrs={"data-column": "PlaceDate"}).text) + c.author = row.find("span", "owner").text[3:] # delete "by " - # other nescessary fields - self._pagging_helpers = {field["name"]: field["value"] for field in root.find_all("input", type="hidden")} + logging.debug("Cache parsed: %s", c) + yield c - # parse results table - data = root.find("table", "SearchResultsTable").find_all("tr", "Data") - return [self._search_parse_cache(c) for c in data] + start_index += 1 @login_needed - def _search_parse_cache(self, root): - """Returns a Cache object parsed from BeautifulSoup Tag.""" + def _search_get_page(self, point, start_index): - assert isinstance(root, bs4.Tag) + logging.debug("Loading page from start_index: %d", start_index) - # parse raw data - favorites = root.find("span", "favorite-rank") - typeLink, nameLink = root.find_all("a", "lnk") - pm_only = root.find("img", title="Premium Member Only Cache") is not None - direction, info, D_T, placed, last_found = root.find_all("span", "small") - found = root.find("img", title="Found It!") is not None - size = root.find("td", "AlignCenter").find("img") - author, wp, area = [t.strip() for t in info.text.split("|")] + if start_index == 0: + # first request has to load normal search page + logging.debug("Using normal search endpoint") - # create cache object - c = Cache(wp, self) + params = urlencode({"origin": point.format(None, "", "", "")}) + url = self._urls["search"] + "?" + params - # prettify data - c.cache_type = typeLink.find("img").get( - "src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src] - c.name = nameLink.span.text.strip() - c.found = found - c.state = "Strike" not in nameLink.get("class") - c.size = size.get("src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src] - c.difficulty, c.terrain = list(map(float, D_T.text.split("/"))) - c.hidden = Util.parse_date(placed.text) - c.author = author[3:] # delete "by " - c.favorites = int(favorites.text) - c.pm_only = pm_only + # make request + try: + return str(self._browser.get(url).soup.find(id="geocaches")) + except requests.exceptions.ConnectionError as e: + raise Error("Cannot load search results.") from e - logging.debug("Cache parsed: %s", c) - return c + else: + # other requests can use AJAX endpoint + logging.debug("Using AJAX search endpoint") + + params = urlencode({ + "inputOrigin": point.format(None, "", "", ""), + "startIndex": start_index, + "originTreatment": 0 + }) + url = self._urls["search_more"] + "?" + params + + # make request + try: + return self._browser.get(url).json()["HtmlString"].strip() + except requests.exceptions.ConnectionError as e: + raise Error("Cannot load search results.") from e def search_quick(self, area, precision=None, strict=False): """Get geocaches inside area, with approximate coordinates @@ -483,7 +463,7 @@ def load_cache(self, wp, destination=None): # prettify data c.name = name.text - c.cache_type = cache_type.split("/")[-1].rsplit(".", 1)[0] + c.cache_type = Cache.get_cache_type_by_img(cache_type) c.author = author.text c.hidden = Util.parse_date(hidden.text.split(":")[-1]) c.location = Point.from_string(location.text) diff --git a/setup.py b/setup.py index 0d832cf..b9652ba 100755 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ info = { "name": "pycaching", - "version": "3.1", # PEP 386 + "version": "3.1.1", # PEP 386 "author": "Tomas Bedrich", "author_email": "ja@tbedrich.cz", "url": "https://github.com/tomasbedrich/pycaching", @@ -19,7 +19,7 @@ "description": "Geocaching.com site crawler. Provides tools for searching, fetching caches and geocoding.", "long_description": long_description, "keywords": ["geocaching", "crawler", "geocache", "cache", "searching", "geocoding"], - "install_requires": ["MechanicalSoup >= 0.2.0", "geopy >= 1.0.0"], + "install_requires": ["MechanicalSoup >= 0.3.0", "geopy >= 1.0.0"], "test_suite": "test" } diff --git a/test/test_cache.py b/test/test_cache.py index 07cea2e..cde2767 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -23,6 +23,10 @@ def test___str__(self): def test___eq__(self): self.assertEqual(self.c, Cache("GC12345", self.gc)) + def test_geocaching(self): + with self.assertRaises(ValueError): + Cache("GC12345", None) + def test_wp(self): self.assertEqual(self.c.wp, "GC12345") @@ -34,7 +38,7 @@ def test_name(self): self.assertEqual(self.c.name, "Testing") def test_type(self): - self.assertEqual(self.c.cache_type, "Traditional Cache") + self.assertEqual(self.c.cache_type, "Traditional") with self.subTest("filter invalid"): with self.assertRaises(ValueError): @@ -47,10 +51,14 @@ def test_location(self): self.c.location = "S 36 51.918 E 174 46.725" self.assertEqual(self.c.location, Point.from_string("S 36 51.918 E 174 46.725")) - with self.subTest("filter invalid"): + with self.subTest("filter invalid string"): with self.assertRaises(ValueError): self.c.location = "somewhere" + with self.subTest("filter invalid types"): + with self.assertRaises(ValueError): + self.c.location = None + def test_state(self): self.assertEqual(self.c.state, True) @@ -88,10 +96,14 @@ def test_hidden(self): self.c.hidden = "1/30/2000" self.assertEqual(self.c.hidden, date(2000, 1, 30)) - with self.subTest("filter invalid"): + with self.subTest("filter invalid string"): with self.assertRaises(ValueError): self.c.hidden = "now" + with self.subTest("filter invalid types"): + with self.assertRaises(ValueError): + self.c.hidden = None + def test_attributes(self): self.assertEqual(self.c.attributes, {"onehour": True, "kids": False, "available": True}) @@ -99,6 +111,10 @@ def test_attributes(self): self.c.attributes = {attr: True for attr in ["onehour", "xxx"]} self.assertEqual(self.c.attributes, {"onehour": True}) + with self.subTest("filter invalid"): + with self.assertRaises(ValueError): + self.c.attributes = None + def test_summary(self): self.assertEqual(self.c.summary, "text") diff --git a/test/test_geocaching.py b/test/test_geocaching.py index a7cdb67..54f5cb6 100644 --- a/test/test_geocaching.py +++ b/test/test_geocaching.py @@ -4,7 +4,7 @@ import unittest import pycaching from geopy.distance import great_circle -from pycaching.errors import LoginFailedException, GeocodeError, PMOnlyException +from pycaching.errors import NotLoggedInException, LoginFailedException, GeocodeError, PMOnlyException from pycaching import Geocaching from pycaching import Cache from pycaching import Point @@ -57,8 +57,8 @@ def test_search(self): self.assertIn(cache.wp, expected) with self.subTest("pagging"): - caches = list(self.g.search(Point(49.733867, 13.397091), 25)) - self.assertNotEqual(caches[0], caches[20]) + caches = list(self.g.search(Point(49.733867, 13.397091), 100)) + self.assertNotEqual(caches[0], caches[50]) def test_search_quick(self): """Perform search and check found caches""" @@ -242,6 +242,10 @@ def test_login(self): with self.assertRaises(LoginFailedException): self.g.login("", "") + def test_login_needed(self): + with self.assertRaises(NotLoggedInException): + self.g.load_cache("GC41FJC") + def test_get_logged_user(self): self.g.login(_username, _password) self.assertEqual(self.g.get_logged_user(), _username)