Browse Source

Scraping of Spore achievements

master
Chris Smith 14 years ago
parent
commit
c18830c4ae
3 changed files with 2038 additions and 0 deletions
  1. 2012
    0
      src/BeautifulSoup.py
  2. 23
    0
      src/Scraper.py
  3. 3
    0
      src/helloworld.py

+ 2012
- 0
src/BeautifulSoup.py
File diff suppressed because it is too large
View File


+ 23
- 0
src/Scraper.py View File

@@ -0,0 +1,23 @@
1
+from BeautifulSoup import BeautifulSoup
2
+import urllib2
3
+
4
+class scraper:
5
+
6
+    def scrape_spore(self, credentials):
7
+        results = []
8
+        url = "http://www.spore.com/view/achievements/%s" % credentials
9
+        try:
10
+          result = urllib2.urlopen(url).read()
11
+          soup = BeautifulSoup(result)
12
+          achdiv = soup.find('h2', 'achievementsH2').findNextSibling('div', 'fields')
13
+
14
+          for ach in achdiv.findAll('table'):
15
+              img = "http://www.spore.com%s" % ach.find('img')['src']
16
+              title = ach.find('b').string.strip()
17
+              desc = ach.find('div', 'achievementDesc').contents[0].strip()
18
+              date = ach.find('span').string.strip()
19
+              results.append({'img': img, 'title': title, 'desc': desc, 'date': date})
20
+        except urllib2.URLError, e:
21
+          handleError(e)
22
+
23
+        return results

+ 3
- 0
src/helloworld.py View File

@@ -1,4 +1,5 @@
1 1
 import cgi
2
+from Scraper import scraper
2 3
 
3 4
 from google.appengine.ext import webapp
4 5
 from google.appengine.api import users
@@ -45,6 +46,8 @@ class MainPage(webapp.RequestHandler):
45 46
 
46 47
         self.show_footer()
47 48
 
49
+        self.response.out.write(scraper().scrape_spore("csmith87"))
50
+
48 51
     def show_header(self):
49 52
         self.response.out.write("""
50 53
           <html>

Loading…
Cancel
Save