Skip to content

Commit

Permalink
Merge branch 'new-search'
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasbedrich committed May 9, 2015
2 parents 5d930f3 + 9ae5a8d commit 32c3616
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 126 deletions.
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Features
- **login** to Geocaching.com
- **search** caches

- normal search (up to 200 caches around any point)
- **NEW:** quick search (all caches inside some area)
- normal search (unlimited number of caches from any point)
- quick search (all caches inside some area)

- **load cache** details by WP

Expand Down Expand Up @@ -52,7 +52,7 @@ Requirements
~~~~~~~~~~~~

- Python >= 3.0 (3.4 required for running tests)
- MechanicalSoup >= 0.2.0
- MechanicalSoup >= 0.3.1
- geopy >= 1.0.0


Expand Down
71 changes: 43 additions & 28 deletions pycaching/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,28 +96,27 @@ class Cache(object):
"wirelessbeacon": "Wireless Beacon"
}

# either key and value is tuple of synonyms
_possible_types = {
# key is cache image url, used for parsing: http://www.geocaching.com/images/WptTypes/[KEY].gif
"2": "Traditional Cache",
"3": "Multi-cache",
"8": "Mystery Cache",
"__8": "Unknown Cache", # same as Mystery, key not used
"5": "Letterbox hybrid",
"6": "Event Cache",
"mega": "Mega-Event Cache",
"giga": "Giga-Event Cache",
"earthcache": "Earthcache",
"137": "Earthcache",
"13": "Cache in Trash out Event",
"11": "Webcam Cache",
"4": "Virtual Cache",
"1858": "Wherigo Cache",
"10Years_32": "Lost and Found Event Cache",
"ape_32": "Project Ape Cache",
"HQ_32": "Groundspeak HQ",
"1304": "GPS Adventures Exhibit",
"4738": "Groundspeak Block Party",
"12": "Locationless (Reverse) Cache",
("2", ): ("Traditional", ),
("3", ): ("Multi-cache", ),
("8", ): ("Mystery", "Unknown", ),
("5", ): ("Letterbox hybrid", ),
("6", ): ("Event", ),
("mega", ): ("Mega-Event", ),
("giga", ): ("Giga-Event", ),
("137", "earthcache", ): ("Earthcache", ),
("13", ): ("Cache in Trash out Event", "CITO", ),
("11", ): ("Webcam", ),
("4", ): ("Virtual", ),
("1858", ): ("Wherigo", ),
("10Years_32", ): ("Lost and Found Event", ),
("ape_32", ): ("Project Ape", ),
("HQ_32", ): ("Groundspeak HQ", ),
("1304", ): ("GPS Adventures Exhibit", ),
("4738", ): ("Groundspeak Block Party", ),
("12", ): ("Locationless (Reverse)", ),
}

_possible_sizes = {
Expand Down Expand Up @@ -225,14 +224,30 @@ def cache_type(self):

@cache_type.setter
def cache_type(self, cache_type):
cache_type = cache_type.replace(" Geocache", "") # with space!
cache_type = cache_type.replace(" Cache", "") # with space!
cache_type = cache_type.strip()
cache_type = cache_type.replace("Geocache", "Cache")
if cache_type in self._possible_types.values(): # try to search in values
self._cache_type = cache_type
elif cache_type in self._possible_types.keys(): # not in values => it must be a key
self._cache_type = self._possible_types[cache_type]
else:
raise ValueError("Cache type '{}' is not possible.".format(cache_type))

# walk trough each type and its synonyms
for key, value in self._possible_types.items():
for synonym in value:
if cache_type.lower() == synonym.lower():
self._cache_type = self._possible_types[key][0]
return

raise ValueError("Cache type '{}' is not possible.".format(cache_type))

@classmethod
def get_cache_type_by_img(cls, src):
"""Returns cache type by its image src"""
# parse src (http://www.geocaching.com/images/WptTypes/[KEY].gif)
img_name = src.split("/")[-1].rsplit(".", 1)[0]

# walk trough each key and its synonyms
for key in cls._possible_types.keys():
for synonym in key:
if img_name == synonym:
return cls._possible_types[key][0]

@property
@lazy_loaded
Expand Down Expand Up @@ -311,7 +326,7 @@ def hidden(self, hidden):
if type(hidden) is str:
hidden = Util.parse_date(hidden)
elif type(hidden) is not datetime.date:
raise ValueError("Passed object is not datetime.date instance nor string containing date.")
raise ValueError("Passed object is not datetime.date instance nor string containing a date.")
self._hidden = hidden

@property
Expand Down
154 changes: 67 additions & 87 deletions pycaching/geocaching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import logging
import math
import requests
import bs4
import mechanicalsoup as ms
from urllib.parse import urlencode
from bs4 import BeautifulSoup
from pycaching.area import Area
from pycaching.cache import Cache
from pycaching.util import Util
Expand Down Expand Up @@ -35,7 +35,8 @@ class Geocaching(object):
_urls = {
"login_page": _baseurl + "login/default.aspx",
"cache_details": _baseurl + "geocache/{wp}",
"caches_nearest": _baseurl + "seek/nearest.aspx",
"search": _baseurl + "play/search",
"search_more": _baseurl + "play/search/more-results",
"geocode": _baseurl + "api/geocode",
"map": _tile_url + "map.details",
"tile": _tile_url + "map.png",
Expand Down Expand Up @@ -132,105 +133,84 @@ def search(self, point, limit=0):

logging.info("Searching at %s...", point)

page_num = 1
cache_num = 0
start_index = 0
while True:
try: # try to load search page
page = self._search_get_page(point, page_num)
except requests.exceptions.ConnectionError as e:
raise StopIteration("Cannot load search page.") from e

for cache in page:
yield cache

cache_num += 1
if limit > 0 and cache_num >= limit:
raise StopIteration()

page_num += 1

@login_needed
def _search_get_page(self, point, page_num):
"""Returns one page of caches as a list.
# get one page
page = self._search_get_page(point, start_index)

Searches for a caches around a point and returns N-th page (specifiend by page argument)."""

assert isinstance(point, Point)
assert type(page_num) is int

logging.info("Fetching page %d.", page_num)

# assemble request
params = urlencode({"lat": point.latitude, "lng": point.longitude})
url = self._urls["caches_nearest"] + "?" + params

# we have to add POST for other pages than 1st
if page_num == 1:
post = None
else:
# TODO handle searching on second page without first
post = self._pagging_helpers
post["__EVENTTARGET"] = self._pagging_postbacks[page_num]
post["__EVENTARGUMENT"] = ""
if not page:
# result is empty - no more caches
raise StopIteration()

# make request
try:
root = self._browser.post(url, post).soup
except requests.exceptions.ConnectionError as e:
raise Error("Cannot load search page #{}.".format(page_num)) from e
# parse caches in result
for start_index, row in enumerate(BeautifulSoup(page).find_all("tr"), start_index):

# root of a few following elements
widget_general = root.find_all("td", "PageBuilderWidget")
if limit > 0 and start_index == limit:
raise StopIteration()

# parse pagging widget
caches_total, page_num, page_count = [int(elm.text) for elm in widget_general[0].find_all("b")]
logging.debug("Found %d results. Showing page %d of %d.", caches_total, page_num, page_count)
# parse raw data
cache_details = row.find("span", "cache-details").text.split("|")
wp = cache_details[1].strip()

# create and fill cache object
c = Cache(wp, self)
c.cache_type = cache_details[0].strip()
c.name = row.find("span", "cache-name").text
c.found = row.find("img", title="Found It!") is not None
c.favorites = int(row.find(attrs={"data-column": "FavoritePoint"}).text)
c.state = not (row.get("class") and "disabled" in row.get("class"))
c.pm_only = row.find("td", "pm-upsell") is not None

if c.pm_only:
# PM only caches doesn't have other attributes filled in
yield c
continue

# save search postbacks for future usage
if page_num == 1:
pagging_links = [_ for _ in widget_general[1].find_all("a") if _.get("id")]
self._pagging_postbacks = {int(link.text): link.get("href").split("'")[1] for link in pagging_links}
c.size = row.find(attrs={"data-column": "ContainerSize"}).text
c.difficulty = float(row.find(attrs={"data-column": "Difficulty"}).text)
c.terrain = float(row.find(attrs={"data-column": "Terrain"}).text)
c.hidden = Util.parse_date(row.find(attrs={"data-column": "PlaceDate"}).text)
c.author = row.find("span", "owner").text[3:] # delete "by "

# other nescessary fields
self._pagging_helpers = {field["name"]: field["value"] for field in root.find_all("input", type="hidden")}
logging.debug("Cache parsed: %s", c)
yield c

# parse results table
data = root.find("table", "SearchResultsTable").find_all("tr", "Data")
return [self._search_parse_cache(c) for c in data]
start_index += 1

@login_needed
def _search_parse_cache(self, root):
"""Returns a Cache object parsed from BeautifulSoup Tag."""
def _search_get_page(self, point, start_index):

assert isinstance(root, bs4.Tag)
logging.debug("Loading page from start_index: %d", start_index)

# parse raw data
favorites = root.find("span", "favorite-rank")
typeLink, nameLink = root.find_all("a", "lnk")
pm_only = root.find("img", title="Premium Member Only Cache") is not None
direction, info, D_T, placed, last_found = root.find_all("span", "small")
found = root.find("img", title="Found It!") is not None
size = root.find("td", "AlignCenter").find("img")
author, wp, area = [t.strip() for t in info.text.split("|")]
if start_index == 0:
# first request has to load normal search page
logging.debug("Using normal search endpoint")

# create cache object
c = Cache(wp, self)
params = urlencode({"origin": point.format(None, "", "", "")})
url = self._urls["search"] + "?" + params

# prettify data
c.cache_type = typeLink.find("img").get(
"src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src]
c.name = nameLink.span.text.strip()
c.found = found
c.state = "Strike" not in nameLink.get("class")
c.size = size.get("src").split("/")[-1].rsplit(".", 1)[0] # filename of img[src]
c.difficulty, c.terrain = list(map(float, D_T.text.split("/")))
c.hidden = Util.parse_date(placed.text)
c.author = author[3:] # delete "by "
c.favorites = int(favorites.text)
c.pm_only = pm_only
# make request
try:
return str(self._browser.get(url).soup.find(id="geocaches"))
except requests.exceptions.ConnectionError as e:
raise Error("Cannot load search results.") from e

logging.debug("Cache parsed: %s", c)
return c
else:
# other requests can use AJAX endpoint
logging.debug("Using AJAX search endpoint")

params = urlencode({
"inputOrigin": point.format(None, "", "", ""),
"startIndex": start_index,
"originTreatment": 0
})
url = self._urls["search_more"] + "?" + params

# make request
try:
return self._browser.get(url).json()["HtmlString"].strip()
except requests.exceptions.ConnectionError as e:
raise Error("Cannot load search results.") from e

def search_quick(self, area, precision=None, strict=False):
"""Get geocaches inside area, with approximate coordinates
Expand Down Expand Up @@ -483,7 +463,7 @@ def load_cache(self, wp, destination=None):

# prettify data
c.name = name.text
c.cache_type = cache_type.split("/")[-1].rsplit(".", 1)[0]
c.cache_type = Cache.get_cache_type_by_img(cache_type)
c.author = author.text
c.hidden = Util.parse_date(hidden.text.split(":")[-1])
c.location = Point.from_string(location.text)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

info = {
"name": "pycaching",
"version": "3.1", # PEP 386
"version": "3.1.1", # PEP 386
"author": "Tomas Bedrich",
"author_email": "ja@tbedrich.cz",
"url": "https://github.com/tomasbedrich/pycaching",
Expand All @@ -19,7 +19,7 @@
"description": "Geocaching.com site crawler. Provides tools for searching, fetching caches and geocoding.",
"long_description": long_description,
"keywords": ["geocaching", "crawler", "geocache", "cache", "searching", "geocoding"],
"install_requires": ["MechanicalSoup >= 0.2.0", "geopy >= 1.0.0"],
"install_requires": ["MechanicalSoup >= 0.3.0", "geopy >= 1.0.0"],
"test_suite": "test"
}

Expand Down
22 changes: 19 additions & 3 deletions test/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def test___str__(self):
def test___eq__(self):
self.assertEqual(self.c, Cache("GC12345", self.gc))

def test_geocaching(self):
with self.assertRaises(ValueError):
Cache("GC12345", None)

def test_wp(self):
self.assertEqual(self.c.wp, "GC12345")

Expand All @@ -34,7 +38,7 @@ def test_name(self):
self.assertEqual(self.c.name, "Testing")

def test_type(self):
self.assertEqual(self.c.cache_type, "Traditional Cache")
self.assertEqual(self.c.cache_type, "Traditional")

with self.subTest("filter invalid"):
with self.assertRaises(ValueError):
Expand All @@ -47,10 +51,14 @@ def test_location(self):
self.c.location = "S 36 51.918 E 174 46.725"
self.assertEqual(self.c.location, Point.from_string("S 36 51.918 E 174 46.725"))

with self.subTest("filter invalid"):
with self.subTest("filter invalid string"):
with self.assertRaises(ValueError):
self.c.location = "somewhere"

with self.subTest("filter invalid types"):
with self.assertRaises(ValueError):
self.c.location = None

def test_state(self):
self.assertEqual(self.c.state, True)

Expand Down Expand Up @@ -88,17 +96,25 @@ def test_hidden(self):
self.c.hidden = "1/30/2000"
self.assertEqual(self.c.hidden, date(2000, 1, 30))

with self.subTest("filter invalid"):
with self.subTest("filter invalid string"):
with self.assertRaises(ValueError):
self.c.hidden = "now"

with self.subTest("filter invalid types"):
with self.assertRaises(ValueError):
self.c.hidden = None

def test_attributes(self):
self.assertEqual(self.c.attributes, {"onehour": True, "kids": False, "available": True})

with self.subTest("filter unknown"):
self.c.attributes = {attr: True for attr in ["onehour", "xxx"]}
self.assertEqual(self.c.attributes, {"onehour": True})

with self.subTest("filter invalid"):
with self.assertRaises(ValueError):
self.c.attributes = None

def test_summary(self):
self.assertEqual(self.c.summary, "text")

Expand Down
Loading

0 comments on commit 32c3616

Please sign in to comment.