Merge branch 'new-search'

tomasbedrich · May 9, 2015 · 32c3616 · 32c3616
2 parents 5d930f3 + 9ae5a8d
commit 32c3616
Show file tree

Hide file tree

Showing 6 changed files with 141 additions and 126 deletions.
diff --git a/README.rst b/README.rst
@@ -11,8 +11,8 @@ Features
 -  **login** to Geocaching.com
 -  **search** caches
 
-   - normal search (up to 200 caches around any point)
-   - **NEW:** quick search (all caches inside some area)
+   - normal search (unlimited number of caches from any point)
+   - quick search (all caches inside some area)
 
 -  **load cache** details by WP
 
@@ -52,7 +52,7 @@ Requirements
 ~~~~~~~~~~~~
 
 -  Python >= 3.0 (3.4 required for running tests)
--  MechanicalSoup >= 0.2.0
+-  MechanicalSoup >= 0.3.1
 -  geopy >= 1.0.0
 
 

diff --git a/pycaching/cache.py b/pycaching/cache.py
@@ -96,28 +96,27 @@ class Cache(object):
         "wirelessbeacon": "Wireless Beacon"
     }
 
+    # either key and value is tuple of synonyms
     _possible_types = {
         # key is cache image url, used for parsing: http://www.geocaching.com/images/WptTypes/[KEY].gif
-        "2": "Traditional Cache",
-        "3": "Multi-cache",
-        "8": "Mystery Cache",
-        "__8": "Unknown Cache",  # same as Mystery, key not used
-        "5": "Letterbox hybrid",
-        "6": "Event Cache",
-        "mega": "Mega-Event Cache",
-        "giga": "Giga-Event Cache",
-        "earthcache": "Earthcache",
-        "137": "Earthcache",
-        "13": "Cache in Trash out Event",
-        "11": "Webcam Cache",
-        "4": "Virtual Cache",
-        "1858": "Wherigo Cache",
-        "10Years_32": "Lost and Found Event Cache",
-        "ape_32": "Project Ape Cache",
-        "HQ_32": "Groundspeak HQ",
-        "1304": "GPS Adventures Exhibit",
-        "4738": "Groundspeak Block Party",
-        "12": "Locationless (Reverse) Cache",
+        ("2", ): ("Traditional", ),
+        ("3", ): ("Multi-cache", ),
+        ("8", ): ("Mystery", "Unknown", ),
+        ("5", ): ("Letterbox hybrid", ),
+        ("6", ): ("Event", ),
+        ("mega", ): ("Mega-Event", ),
+        ("giga", ): ("Giga-Event", ),
+        ("137", "earthcache", ): ("Earthcache", ),
+        ("13", ): ("Cache in Trash out Event", "CITO", ),
+        ("11", ): ("Webcam", ),
+        ("4", ): ("Virtual", ),
+        ("1858", ): ("Wherigo", ),
+        ("10Years_32", ): ("Lost and Found Event", ),
+        ("ape_32", ): ("Project Ape", ),
+        ("HQ_32", ): ("Groundspeak HQ", ),
+        ("1304", ): ("GPS Adventures Exhibit", ),
+        ("4738", ): ("Groundspeak Block Party", ),
+        ("12", ): ("Locationless (Reverse)", ),
     }
 
     _possible_sizes = {
@@ -225,14 +224,30 @@ def cache_type(self):
 
     @cache_type.setter
     def cache_type(self, cache_type):
+        cache_type = cache_type.replace(" Geocache", "")  # with space!
+        cache_type = cache_type.replace(" Cache", "")  # with space!
         cache_type = cache_type.strip()
-        cache_type = cache_type.replace("Geocache", "Cache")
-        if cache_type in self._possible_types.values():  # try to search in values
-            self._cache_type = cache_type
-        elif cache_type in self._possible_types.keys():  # not in values => it must be a key
-            self._cache_type = self._possible_types[cache_type]
-        else:
-            raise ValueError("Cache type '{}' is not possible.".format(cache_type))
+
+        # walk trough each type and its synonyms
+        for key, value in self._possible_types.items():
+            for synonym in value:
+                if cache_type.lower() == synonym.lower():
+                    self._cache_type = self._possible_types[key][0]
+                    return
+
+        raise ValueError("Cache type '{}' is not possible.".format(cache_type))
+
+    @classmethod
+    def get_cache_type_by_img(cls, src):
+        """Returns cache type by its image src"""
+        # parse src (http://www.geocaching.com/images/WptTypes/[KEY].gif)
+        img_name = src.split("/")[-1].rsplit(".", 1)[0]
+
+        # walk trough each key and its synonyms
+        for key in cls._possible_types.keys():
+            for synonym in key:
+                if img_name == synonym:
+                    return cls._possible_types[key][0]
 
     @property
     @lazy_loaded
@@ -311,7 +326,7 @@ def hidden(self, hidden):
         if type(hidden) is str:
             hidden = Util.parse_date(hidden)
         elif type(hidden) is not datetime.date:
-            raise ValueError("Passed object is not datetime.date instance nor string containing date.")
+            raise ValueError("Passed object is not datetime.date instance nor string containing a date.")
         self._hidden = hidden
 
     @property

diff --git a/pycaching/geocaching.py b/pycaching/geocaching.py
@@ -3,9 +3,9 @@
 import logging
 import math
 import requests
-import bs4
 import mechanicalsoup as ms
 from urllib.parse import urlencode
+from bs4 import BeautifulSoup
 from pycaching.area import Area
 from pycaching.cache import Cache
 from pycaching.util import Util
@@ -35,7 +35,8 @@ class Geocaching(object):
     _urls = {
         "login_page":       _baseurl + "login/default.aspx",
         "cache_details":    _baseurl + "geocache/{wp}",
-        "caches_nearest":   _baseurl + "seek/nearest.aspx",
+        "search":           _baseurl + "play/search",
+        "search_more":      _baseurl + "play/search/more-results",
         "geocode":          _baseurl + "api/geocode",
         "map":              _tile_url + "map.details",
         "tile":             _tile_url + "map.png",
@@ -132,105 +133,84 @@ def search(self, point, limit=0):
 
         logging.info("Searching at %s...", point)
 
-        page_num = 1
-        cache_num = 0
+        start_index = 0
         while True:
-            try:  # try to load search page
-                page = self._search_get_page(point, page_num)
-            except requests.exceptions.ConnectionError as e:
-                raise StopIteration("Cannot load search page.") from e
-
-            for cache in page:
-                yield cache
-
-                cache_num += 1
-                if limit > 0 and cache_num >= limit:
-                    raise StopIteration()
-
-            page_num += 1
-
-    @login_needed
-    def _search_get_page(self, point, page_num):
-        """Returns one page of caches as a list.
+            # get one page
+            page = self._search_get_page(point, start_index)
 
-        Searches for a caches around a point and returns N-th page (specifiend by page argument)."""
-
-        assert isinstance(point, Point)
-        assert type(page_num) is int
-
-        logging.info("Fetching page %d.", page_num)
-
-        # assemble request
-        params = urlencode({"lat": point.latitude, "lng": point.longitude})
-        url = self._urls["caches_nearest"] + "?" + params
-
-        # we have to add POST for other pages than 1st
-        if page_num == 1:
-            post = None
-        else:
-            # TODO handle searching on second page without first
-            post = self._pagging_helpers
-            post["__EVENTTARGET"] = self._pagging_postbacks[page_num]
-            post["__EVENTARGUMENT"] = ""
+            if not page:
+                # result is empty - no more caches
+                raise StopIteration()
 
-        # make request
-        try:
-            root = self._browser.post(url, post).soup
-        except requests.exceptions.ConnectionError as e:
-            raise Error("Cannot load search page #{}.".format(page_num)) from e
+            # parse caches in result
+            for start_index, row in enumerate(BeautifulSoup(page).find_all("tr"), start_index):
 
-        # root of a few following elements
-        widget_general = root.find_all("td", "PageBuilderWidget")
+                if limit > 0 and start_index == limit:
+                    raise StopIteration()
 
-        # parse pagging widget
-        caches_total, page_num, page_count = [int(elm.text) for elm in widget_general[0].find_all("b")]
-        logging.debug("Found %d results. Showing page %d of %d.", caches_total, page_num, page_count)
+                # parse raw data
+                cache_details = row.find("span", "cache-details").text.split("|")
+                wp = cache_details[1].strip()
+
+                # create and fill cache object
+                c = Cache(wp, self)
+                c.cache_type = cache_details[0].strip()
+                c.name = row.find("span", "cache-name").text
+                c.found = row.find("img", title="Found It!") is not None
+                c.favorites = int(row.find(attrs={"data-column": "FavoritePoint"}).text)
+                c.state = not (row.get("class") and "disabled" in row.get("class"))
+                c.pm_only = row.find("td", "pm-upsell") is not None
+
+                if c.pm_only:
+                    # PM only caches doesn't have other attributes filled in
+                    yield c
+                    continue
 
-        # save search postbacks for future usage
-        if page_num == 1:
-            pagging_links = [_ for _ in widget_general[1].find_all("a") if _.get("id")]
-            self._pagging_postbacks = {int(link.text): link.get("href").split("'")[1] for link in pagging_links}
+                c.size = row.find(attrs={"data-column": "ContainerSize"}).text
+                c.difficulty = float(row.find(attrs={"data-column": "Difficulty"}).text)
+                c.terrain = float(row.find(attrs={"data-column": "Terrain"}).text)
+                c.hidden = Util.parse_date(row.find(attrs={"data-column": "PlaceDate"}).text)
+                c.author = row.find("span", "owner").text[3:]  # delete "by "
 
-            # other nescessary fields
-            self._pagging_helpers = {field["name"]: field["value"] for field in root.find_all("input", type="hidden")}
+                logging.debug("Cache parsed: %s", c)
+                yield c
 
-        # parse results table
-        data = root.find("table", "SearchResultsTable").find_all("tr", "Data")
-        return [self._search_parse_cache(c) for c in data]
+            start_index += 1
 
     @login_needed
-    def _search_parse_cache(self, root):
-        """Returns a Cache object parsed from BeautifulSoup Tag."""
+    def _search_get_page(self, point, start_index):
 
-        assert isinstance(root, bs4.Tag)
+        logging.debug("Loading page from start_index: %d", start_index)
 
-        # parse raw data
-        favorites = root.find("span", "favorite-rank")
-        typeLink, nameLink = root.find_all("a", "lnk")
-        pm_only = root.find("img", title="Premium Member Only Cache") is not None
-        direction, info, D_T, placed, last_found = root.find_all("span", "small")
-        found = root.find("img", title="Found It!") is not None
-        size = root.find("td", "AlignCenter").find("img")
-        author, wp, area = [t.strip() for t in info.text.split("|")]
+        if start_index == 0:
+            # first request has to load normal search page
+            logging.debug("Using normal search endpoint")
 
-        # create cache object
-        c = Cache(wp, self)
+            params = urlencode({"origin": point.format(None, "", "", "")})
+            url = self._urls["search"] + "?" + params
 
-        # prettify data
-        c.cache_type = typeLink.find("img").get(
-            "src").split("/")[-1].rsplit(".", 1)[0]  # filename of img[src]
-        c.name = nameLink.span.text.strip()
-        c.found = found
-        c.state = "Strike" not in nameLink.get("class")
-        c.size = size.get("src").split("/")[-1].rsplit(".", 1)[0]  # filename of img[src]
-        c.difficulty, c.terrain = list(map(float, D_T.text.split("/")))
-        c.hidden = Util.parse_date(placed.text)
-        c.author = author[3:]  # delete "by "
-        c.favorites = int(favorites.text)
-        c.pm_only = pm_only
+            # make request
+            try:
+                return str(self._browser.get(url).soup.find(id="geocaches"))
+            except requests.exceptions.ConnectionError as e:
+                raise Error("Cannot load search results.") from e
 
-        logging.debug("Cache parsed: %s", c)
-        return c
+        else:
+            # other requests can use AJAX endpoint
+            logging.debug("Using AJAX search endpoint")
+
+            params = urlencode({
+                "inputOrigin": point.format(None, "", "", ""),
+                "startIndex": start_index,
+                "originTreatment": 0
+            })
+            url = self._urls["search_more"] + "?" + params
+
+            # make request
+            try:
+                return self._browser.get(url).json()["HtmlString"].strip()
+            except requests.exceptions.ConnectionError as e:
+                raise Error("Cannot load search results.") from e
 
     def search_quick(self, area, precision=None, strict=False):
         """Get geocaches inside area, with approximate coordinates
@@ -483,7 +463,7 @@ def load_cache(self, wp, destination=None):
 
         # prettify data
         c.name = name.text
-        c.cache_type = cache_type.split("/")[-1].rsplit(".", 1)[0]
+        c.cache_type = Cache.get_cache_type_by_img(cache_type)
         c.author = author.text
         c.hidden = Util.parse_date(hidden.text.split(":")[-1])
         c.location = Point.from_string(location.text)

diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 
 info = {
     "name":                "pycaching",
-    "version":             "3.1",  # PEP 386
+    "version":             "3.1.1",  # PEP 386
     "author":              "Tomas Bedrich",
     "author_email":        "ja@tbedrich.cz",
     "url":                 "https://github.com/tomasbedrich/pycaching",
@@ -19,7 +19,7 @@
     "description":         "Geocaching.com site crawler. Provides tools for searching, fetching caches and geocoding.",
     "long_description":    long_description,
     "keywords":            ["geocaching", "crawler", "geocache", "cache", "searching", "geocoding"],
-    "install_requires":    ["MechanicalSoup >= 0.2.0", "geopy >= 1.0.0"],
+    "install_requires":    ["MechanicalSoup >= 0.3.0", "geopy >= 1.0.0"],
     "test_suite":          "test"
 }
 

diff --git a/test/test_cache.py b/test/test_cache.py
@@ -23,6 +23,10 @@ def test___str__(self):
     def test___eq__(self):
         self.assertEqual(self.c, Cache("GC12345", self.gc))
 
+    def test_geocaching(self):
+        with self.assertRaises(ValueError):
+            Cache("GC12345", None)
+
     def test_wp(self):
         self.assertEqual(self.c.wp, "GC12345")
 
@@ -34,7 +38,7 @@ def test_name(self):
         self.assertEqual(self.c.name, "Testing")
 
     def test_type(self):
-        self.assertEqual(self.c.cache_type, "Traditional Cache")
+        self.assertEqual(self.c.cache_type, "Traditional")
 
         with self.subTest("filter invalid"):
             with self.assertRaises(ValueError):
@@ -47,10 +51,14 @@ def test_location(self):
             self.c.location = "S 36 51.918 E 174 46.725"
             self.assertEqual(self.c.location, Point.from_string("S 36 51.918 E 174 46.725"))
 
-        with self.subTest("filter invalid"):
+        with self.subTest("filter invalid string"):
             with self.assertRaises(ValueError):
                 self.c.location = "somewhere"
 
+        with self.subTest("filter invalid types"):
+            with self.assertRaises(ValueError):
+                self.c.location = None
+
     def test_state(self):
         self.assertEqual(self.c.state, True)
 
@@ -88,17 +96,25 @@ def test_hidden(self):
             self.c.hidden = "1/30/2000"
             self.assertEqual(self.c.hidden, date(2000, 1, 30))
 
-        with self.subTest("filter invalid"):
+        with self.subTest("filter invalid string"):
             with self.assertRaises(ValueError):
                 self.c.hidden = "now"
 
+        with self.subTest("filter invalid types"):
+            with self.assertRaises(ValueError):
+                self.c.hidden = None
+
     def test_attributes(self):
         self.assertEqual(self.c.attributes, {"onehour": True, "kids": False, "available": True})
 
         with self.subTest("filter unknown"):
             self.c.attributes = {attr: True for attr in ["onehour", "xxx"]}
             self.assertEqual(self.c.attributes, {"onehour": True})
 
+        with self.subTest("filter invalid"):
+            with self.assertRaises(ValueError):
+                self.c.attributes = None
+
     def test_summary(self):
         self.assertEqual(self.c.summary, "text")