Incomplete webapp to aggregate achievements/badges from various sources
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Scraper.py 2.2KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. from BeautifulSoup import BeautifulSoup
  2. from datetime import datetime
  3. import urllib2
  4. class Scraper:
  5. @staticmethod
  6. def scrape_spore(credentials):
  7. results = []
  8. url = "http://www.spore.com/view/achievements/%s" % credentials
  9. fmt = "%a %B %d, %Y"
  10. try:
  11. result = urllib2.urlopen(url).read()
  12. soup = BeautifulSoup(result)
  13. achdiv = soup.find('h2', 'achievementsH2').findNextSibling('div', 'fields')
  14. for ach in achdiv.findAll('table'):
  15. results.append({'img': "http://www.spore.com%s" % ach.find('img')['src'],
  16. 'title': ach.find('b').string.strip(),
  17. 'desc': ach.find('div', 'achievementDesc').contents[0].strip(),
  18. 'date': datetime.strptime(ach.find('span').string.strip(), fmt)})
  19. except urllib2.URLError, e:
  20. handleError(e)
  21. return results
  22. @staticmethod
  23. def scrape_steam(credentials):
  24. results = []
  25. prefix = "http://steamcommunity.com/id/%s/"
  26. url = "%sgames?xml=1" % (prefix % credentials)
  27. try:
  28. result = urllib2.urlopen(url).read()
  29. soup = BeautifulSoup(result)
  30. for globalLink in soup.findAll('globalstatslink'):
  31. game = globalLink.parent
  32. name = game.find('name').string.strip()
  33. url = prefix + game.find('statslink').string.strip()[len(prefix % credentials):]
  34. results.append({'name': name, 'url': url})
  35. except urllib2.URLError, e:
  36. handleError(e)
  37. return results
  38. @staticmethod
  39. def scrape_steam_game(credentials, base_url):
  40. results = []
  41. url = (base_url % credentials) + "?xml=1"
  42. try:
  43. result = urllib2.urlopen(url).read()
  44. soup = BeautifulSoup(result)
  45. for ach in soup.findAll('achievement', {'closed': '1'}):
  46. results.append({'img': ach.find('iconclosed').string.strip(),
  47. 'title': ach.find('name').string.strip(),
  48. 'desc': ach.find('description').string.strip(),
  49. 'date': None})
  50. except urllib2.URLError, e:
  51. handleError(e)
  52. return results