Skip to content

Commit

Permalink
Get rid of some more warnings (Charcoal-SE#43). --autopull
Browse files Browse the repository at this point in the history
Also change back sys.exit to os._exit. sys.exit does something with an
exception, which might cause a problem for our auto-reverter.
  • Loading branch information
thomas-daniels committed Jan 25, 2015
1 parent d9d0777 commit 127d2f7
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 27 deletions.
24 changes: 18 additions & 6 deletions chatcommunicate.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import random
import requests
import time
from parsing import *
from datahandling import *
from bayesianfuncs import *
from parsing import fetch_post_id_and_site_from_msg_content,\
get_user_from_url, fetch_owner_url_from_msg_content,\
fetch_title_from_msg_content
from datahandling import add_false_positive, is_privileged,\
add_whitelisted_user, add_blacklisted_user, add_ignored_post
from bayesianfuncs import bayesian_learn_title
from globalvars import GlobalVars
import os
import re
from datetime import datetime


def post_message_in_room(room_id_str, msg):
Expand Down Expand Up @@ -63,7 +69,7 @@ def watcher(ev, wrap2):
if should_delete:
msg_to_delete.delete()
except:
pass # couldn't delete message
pass # couldn't delete message
if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \
and is_privileged(ev_room, ev_user_id):
try:
Expand Down Expand Up @@ -117,7 +123,7 @@ def watcher(ev, wrap2):
if str(msg_to_delete.owner.id) == GlobalVars.smokeDetector_user_id[ev_room]:
msg_to_delete.delete()
except:
pass # couldn't delete message
pass # couldn't delete message
if content_lower.startswith("!!/wut"):
ev.message.reply("Whaddya mean, 'wut'? Humans...")
if content_lower.startswith("!!/lick"):
Expand Down Expand Up @@ -146,7 +152,13 @@ def watcher(ev, wrap2):
'Watching this endless list of new questions *never* gets boring',
'Kinda sorta']))
if content_lower.startswith("!!/rev"):
ev.message.reply('[' + GlobalVars.commit_with_author + '](https://github.com/Charcoal-SE/SmokeDetector/commit/'+ GlobalVars.commit +')')
ev.message.reply(
'[' +
GlobalVars.commit_with_author +
'](https://github.com/Charcoal-SE/SmokeDetector/commit/' +
GlobalVars.commit +
')'
)
if content_lower.startswith("!!/status"):
ev.message.reply('Running since %s UTC' % GlobalVars.startup_utc)
if content_lower.startswith("!!/reboot"):
Expand Down
15 changes: 9 additions & 6 deletions continuousintegration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import os
from globalvars import GlobalVars
import datetime
import json
import time


def watch_ci():
HOST = ''
PORT = 49494

s=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print "CI Socket Created"

try:
Expand All @@ -25,14 +25,17 @@ def watch_ci():

while 1:
conn, addr = s.accept()

addr_host = socket.gethostbyaddr(addr[0])[0]
is_circleci = True if re.compile(r"ec2-\d{1,3}-\d{1,3}-\d{1,3}-\d{1,3}.compute-1.amazonaws.com").search(addr_host) else False
print 'Received request from ' + addr[0] + " ; " + "verified as CircleCI" if is_circleci else "NOT verified as CircleCI!"
if not is_circleci:
conn.close()
continue
conn.send('HTTP/1.1 200 OK\nContent-Type: text/plain\nContent-Length: 2\n\nOK\n')
conn.send('HTTP/1.1 200 OK\n' +
'Content-Type: text/plain\n'
'Content-Length: 2\n' +
'\nOK\n')
conn.close()
r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master')
latest_sha = r.json()["object"]["sha"]
Expand All @@ -41,7 +44,7 @@ def watch_ci():
state = status["state"]
target_url = status["target_url"]
if state == "success":
if datetime.datetime.strptime(status["updated_at"], '%Y-%m-%dT%H:%M:%SZ') > datetime.datetime.now()-datetime.timedelta(seconds=10):
if datetime.datetime.strptime(status["updated_at"], '%Y-%m-%dT%H:%M:%SZ') > datetime.datetime.now() - datetime.timedelta(seconds=10):

r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha)
commit_message = r.json()["commit"]["message"]
Expand All @@ -56,7 +59,7 @@ def watch_ci():

continue
elif state == "error" or state == "failure":
if datetime.datetime.strptime(status["updated_at"], '%Y-%m-%dT%H:%M:%SZ') > datetime.datetime.now()-datetime.timedelta(seconds=10):
if datetime.datetime.strptime(status["updated_at"], '%Y-%m-%dT%H:%M:%SZ') > datetime.datetime.now() - datetime.timedelta(seconds=10):
GlobalVars.charcoal_hq.send_message("[CI build failed](%s), *someone* (prolly Undo) borked something!" % target_url)
continue
s.close()
23 changes: 12 additions & 11 deletions findspam.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import re
import phonenumbers


class FindSpam:
rules = [
{'regex': u"(?i)\\b(baba(ji)?|fifa.*coins?|nike|tosterone|bajotz|vashi?k[ae]r[ae]n|sumer|kolcak|porn|molvi|judi bola|ituBola.com|lost lover|11s|acai|skin care|rejuvenated skin|LifeForce|swtor2credits|me2.do|black magic|bam2u|Neuro(3X|flexyn)|Nutra|TesteroneXL|Bowtrol|Slim ?Genix|Cleanse EFX|Babyliss ?Pro|Forskolin|Blackline Elite|TestCore Pro|Xtreme Antler|Maxx Test 3000|Cheap Wigs?|jivam|(Improve )?Brain Power|aging skin|acne( prone)? skin|(skin )?eye serum|skin (serum|eye)|(fake|original) (passports?|driver'?s? licen[cs]e|ID cards?)|bagprada)\\b|ಌ|(support|service|helpline)( phone)? number|1[ -]?866[ -]?978[ -]?6819|>>>>(?s).*http", 'all': True,
'sites': [], 'reason': "Bad keyword in {}", 'title': True, 'body': True, 'username': True},
'sites': [], 'reason': "Bad keyword in {}", 'title': True, 'body': True, 'username': True},
{'regex': u"(?i)\\b(fifabay)\\b", 'all': True, 'reason': "Bad keyword in {}", 'sites': [], 'title': True, 'body': True, 'username': True},
{'regex': u"(?i)\\b(weight (body ?builder|loo?s[es]|reduction)|muscles? build(ing)?|muscles?( (grow(th)?|diets?))?|anti aging|SkinCentric|loo?s[es] weight|wrinkles?)\\b", 'all': True,
'sites': ["fitness.stackexchange.com"], 'reason': "Bad keyword in {}", 'title': True, 'body': False, 'username': True},
'sites': ["fitness.stackexchange.com"], 'reason': "Bad keyword in {}", 'title': True, 'body': False, 'username': True},
{'regex': u"(?i)^(?:(?=.*?\\b(?:online|hd)\\b)(?=.*?(?:free|full|unlimited)).*?movies?\\b|(?=.*?\\b(?:acai|kisn)\\b)(?=.*?care).*products?\\b|(?=.*?packer).*mover)", 'all': True,
'sites': [], 'reason': "Bad keywords in {}", 'title': True, 'body': False, 'username': True},
'sites': [], 'reason': "Bad keywords in {}", 'title': True, 'body': False, 'username': True},
{'regex': u"\\d(?:_*\\d){9}|\\+?\\d_*\\d[\\s\\-]?(?:_*\\d){8,10}|\\d[ -]?\\d{3}[ -]?\\d{3}[ -]?\\d{4}", 'all': True,
'sites': ["patents.stackexchange.com"], 'reason': "Phone number detected", 'validation_method': 'check_phone_numbers', 'title': True, 'body': False, 'username': False},
'sites': ["patents.stackexchange.com"], 'reason': "Phone number detected", 'validation_method': 'check_phone_numbers', 'title': True, 'body': False, 'username': False},
{'regex': u"(?i)\\b(nigg(a|er)|asshole|fag|fuck(ing?)?|shit|whore)s?\\b", 'all': True,
'sites': [], 'reason': "Offensive {} detected",'insensitive':True, 'title': True, 'body': True, 'username': False},
'sites': [], 'reason': "Offensive {} detected", 'insensitive':True, 'title': True, 'body': True, 'username': False},
{'regex': u"(?i)\\b(crap)\\b", 'all': True, 'sites': [], 'reason': "Offensive {} detected", 'insensitive': True, 'title': True, 'body': False, 'username': False},
{'regex': u"^(?=.*[A-Z])[^a-z]*$", 'all': True, 'sites': [], 'reason': "All-caps title", 'title': True, 'username': False},
{'regex': u"^(?=.*[0-9])[^a-zA-Z]*$", 'all': True, 'sites': [], 'reason': "Numbers-only title", 'title': True, 'body': False, 'username': False},
{'regex': u"https?://[a-zA-Z0-9_.-]+\\.[a-zA-Z]{2,4}(/[a-zA-Z0-9_/?=.-])?", 'all': True,
'sites': ["stackoverflow.com", "superuser.com", "askubuntu.com"], 'reason': "URL in title", 'title': True, 'body': False, 'username': False},
'sites': ["stackoverflow.com", "superuser.com", "askubuntu.com"], 'reason': "URL in title", 'title': True, 'body': False, 'username': False},
{'regex': u"(?i)(online ?kelas|wowtoes|ipubsoft|orabank|powerigfaustralia|cfpchampionship2015playofflive|optimalstackfacts|maletestosteronebooster|x4facts|tripleeffectseyeserum|healthcaresup|garciniacambogiaprofacts|filerepairforum|lxwpro-t|casque-beatsbydre|tenderpublish|elliskinantiaging|funmac|lovebiscuits)", 'sites': [], 'all': True, 'reason': "Blacklisted website", 'title': True, 'body': True, 'username': True},
{'regex': u"([a-zA-Z])\\1{10,}", 'all': True, 'sites': [], 'reason': "Repeating characters in {}", 'title': True, 'body': True, 'username': False}
]
Expand All @@ -38,26 +39,26 @@ def test_post(title, body, user_name, site, is_answer):
try:
if getattr(FindSpam, "%s" % rule['validation_method'])(matched_title):
result.append(rule['reason'])
except KeyError: # There is no special logic for this rule
except KeyError: # There is no special logic for this rule
result.append(rule['reason'].replace("{}", "title"))
if matched_username and rule['username']:
try:
if getattr(FindSpam, "%s" % rule['validation_method'])(matched_username):
result.append(rule['reason'])
except KeyError: # There is no special logic for this rule
except KeyError: # There is no special logic for this rule
result.append(rule['reason'].replace("{}", "username"))
if matched_body and rule['body']:
type_of_post = "answer" if is_answer else "body"
try:
if getattr(FindSpam, "%s" % rule['validation_method'])(matched_body):
result.append(rule['reason'].replace("{}", type_of_post))
except KeyError: # There is no special logic for this rule
except KeyError: # There is no special logic for this rule
result.append(rule['reason'].replace("{}", type_of_post))
return result

@staticmethod
def check_phone_numbers(matched):
test_formats = [ "IN", "US", None ]
test_formats = ["IN", "US", None]
for phone_number in matched:
for testf in test_formats:
try:
Expand Down
3 changes: 2 additions & 1 deletion parseuserresponse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys, json;
import sys
import json

array = json.load(sys.stdin)["items"]

Expand Down
1 change: 1 addition & 0 deletions parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,6 @@ def fetch_title_from_msg_content(content):
def unescape_title(title_escaped):
return GlobalVars.parser.unescape(title_escaped).strip()


def escape_special_chars_in_title(title_unescaped):
return re.sub(r"([_*\\`\[\]])", r"\\\1", title_unescaped)
3 changes: 3 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
ignore = E501
exclude = ChatExchange/*,bayesian/*,.idea/*,.git/*
6 changes: 3 additions & 3 deletions ws.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@

def restart_automatically(time_in_seconds):
time.sleep(time_in_seconds)
sys.exit(1)
os._exit(1)

Thread(target=restart_automatically, args=(3600,)).start()

Expand Down Expand Up @@ -122,7 +122,7 @@ def restart_automatically(time_in_seconds):
with open("errorLogs.txt", "a") as f:
f.write(logged_msg)
if seconds < 180:
sys.exit(4)
os._exit(4)
ws = websocket.create_connection("ws://qa.sockets.stackexchange.com/")
ws.send("155-questions-active")
GlobalVars.charcoal_hq.send_message("Recovered from `" + exception_only + "`")
Expand All @@ -131,6 +131,6 @@ def restart_automatically(time_in_seconds):
delta = UtcDate.startup_utc_date - now
seconds = delta.total_seconds()
if seconds < 60:
sys.exit(4)
os._exit(4)
s = "[ [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector) ] SmokeDetector aborted"
GlobalVars.charcoal_hq.send_message(s)

0 comments on commit 127d2f7

Please sign in to comment.