-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfacebook-bitly.py
89 lines (70 loc) · 3.81 KB
/
facebook-bitly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import urllib.request
import json
import re
import itertools
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from tabledef import *
# import os
FACEBOOK_KEY = "REPLACE_ME" # replace this with your access key, for example: FACEBOOK_KEY = "1234567890|sdubgfownudgwer28ru8nc"
# FACEBOOK_KEY = os.environ.get('FACEBOOK_KEY')
url = "https://graph.facebook.com/v2.2/546443732093598/posts?access_token=" + FACEBOOK_KEY + "&limit=100"
try:
r = urllib.request.urlopen(url)
except urllib.error.HTTPError:
# raise
print("Error retrieving page. Quitting Python...")
quit()
data = json.loads(r.read().decode(r.info().get_param('charset') or 'utf-8'))
try:
print("page 0 exists.")
print(url, "\n")
print(data["paging"]["next"], "\n")
except (AttributeError, KeyError):
print("No data found.")
quit()
# Initialise Database
engine = create_engine('sqlite:///courses.db')
Session = sessionmaker(bind=engine)
session = Session()
for page_count in itertools.count(1): # count(start, [step]) -> https://docs.python.org/3/library/itertools.html
try:
url = "https://graph.facebook.com/v2.2/546443732093598/posts?access_token=" + FACEBOOK_KEY + "&limit=100" + "&offset=" + str(page_count) + "00"
r = urllib.request.urlopen(url)
data = json.loads(r.read().decode(r.info().get_param('charset') or 'utf-8'))
## START STEP 2 ##
########################################################################################
for key in data["data"]:
try:
bitly_url = re.search("(?P<url>https?://bit.ly[^\s]+)", key["message"]).group("url")
post_date = key["created_time"]
print(bitly_url, "\n", post_date)
# if bitly_url and post_date exist, then pass (for the first time, thereafter quit <- we are NOT doing this in this workshop)
# else if bitly_url exist but post_date does not exist, do not add and pass
# else, add entry into db.
if session.query(Course).filter(Course.bitly_url == bitly_url).filter(Course.post_date == post_date).first() is not None:
# uncomment below if this is the first time scrapping this Facebook page
print("bit.ly url and post date duplicate found @", session.query(Course).filter(Course.bitly_url == bitly_url).filter(Course.post_date == post_date).first().post_date ,"so skipping to next...")
pass
# uncomment below if you have scrapped all of this Facebook page
# print("bit.ly url and post date duplicate found. quitting python...")
# quit()
elif session.query(Course).filter(Course.bitly_url == bitly_url).first() is not None:
print("bit.ly url already added @", session.query(Course).filter(Course.bitly_url == bitly_url).first().post_date ,". skipping to next...")
pass
else:
course = Course(bitly_url=bitly_url, post_date=post_date, status="bit.ly url found", expanded_url=None, udemy_url=None, checkout_url=None, course_name=None, discounted_price=None, original_price=None, date_last_checked=None, remarks=None)
session.add(course)
session.commit()
print("bit.ly url added!")
except AttributeError:
pass
########################################################################################
## END STEP 2 ##
print("page", str(page_count), "exists.")
print(url, "\n")
print(data["paging"]["next"], "\n")
except KeyError:
print("No additional data found.")
session.close()
quit()