瀏覽代碼

Scraper for steam games page

master
Chris Smith 14 年之前
父節點
當前提交
1bf9bef659
共有 1 個檔案被更改,包括 21 行新增0 行删除
  1. 21
    0
      src/Scraper.py

+ 21
- 0
src/Scraper.py 查看文件

@@ -24,3 +24,24 @@ class Scraper:
24 24
           handleError(e)
25 25
 
26 26
         return results
27
+
28
+    @staticmethod
29
+    def scrape_steam(credentials):
30
+        results = []
31
+        prefix = "http://steamcommunity.com/id/%s/"
32
+        url = "%sgames?xml=1" % (prefix % credentials)
33
+
34
+        try:
35
+          result = urllib2.urlopen(url).read()
36
+          soup = BeautifulSoup(result)
37
+
38
+          for globalLink in soup.findAll('globalstatslink'):
39
+            game = globalLink.parent
40
+            name = game.find('name').string.strip()
41
+            url = prefix + game.find('statslink').string.strip()[len(prefix % credentials):]
42
+            results.append({'name': name, 'url': url})
43
+
44
+        except urllib2.URLError, e:
45
+          handleError(e)
46
+
47
+        return results

Loading…
取消
儲存