forked from edsu/wikistream
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstream.py
executable file
·66 lines (53 loc) · 1.95 KB
/
stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
"""
In this module you'll find a function wikipedia_updates which takes a callback
function which will be passed wikipedia updates that stream from the socket.io
server at http://wikistream.inkdroid.org
Each update will be passed to your callback function will be a Python dictionary
that looks something like:
{
'anonymous': False,
'comment': '/* Anatomy */ changed statement that orbit was the eye to saying
that the orbit was the eye socket for accuracy',
'delta': 7,
'flag': '',
'namespace': 'article',
'newPage': False,
'page': 'Optic nerve',
'pageUrl': 'http://en.wikipedia.org/wiki/Optic_nerve',
'robot': False,
'unpatrolled': False,
'url': 'http://en.wikipedia.org/w/index.php?diff=449570600&oldid=447889877',
'user': 'Moearly',
'userUrl': 'http://en.wikipedia.org/wiki/User:Moearly',
'wikipedia': '#en.wikipedia',
'wikipediaLong': 'English Wikipedia',
'wikipediaShort': 'en',
'wikipediaUrl': 'http://en.wikipedia.org'
}
You'll need the requests (http://pypi.python.org/pypi/requests) library installed
for the HTTP requests.
More about the protocol that socket.io uses can be found at:
https://github.com/learnboost/socket.io-spec
"""
import re
import json
import time
from requests import post, get
def wikipedia_updates(callback):
endpoint = "http://wikistream.inkdroid.org/socket.io/1"
endpoint = "http://localhost:3000/socket.io/1"
session_id = post(endpoint).content.split(':')[0]
xhr_endpoint = "/".join((endpoint, "xhr-polling", session_id))
while True:
t = time.time() * 1000000
response = get(xhr_endpoint, params={'t': t}).content.decode('utf-8')
chunks = re.split(u'\ufffd[0-9]+\ufffd', response)
for chunk in chunks:
parts = chunk.split(':', 3)
if len(parts) == 4:
callback(json.loads(parts[3]))
if __name__ == "__main__":
def print_page(update):
print update
wikipedia_updates(print_page)