-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
111 lines (83 loc) · 2.87 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from datetime import datetime
from dateutil import tz
from os import getenv
from wikia_common_kibana import Kibana
import logging
import re
logging.basicConfig(level=logging.INFO)
class ElecenaKibana(Kibana):
@staticmethod
def format_index(prefix, timestamp, sep='-'):
"""
:type prefix str
:type timestamp int
:type sep str
:rtype: str
"""
tz_info = tz.tzutc()
# e.g.. syslog-ng_2018-05-05
return "{prefix}_{date}".format(
prefix=prefix, date=datetime.fromtimestamp(timestamp, tz=tz_info).strftime('%Y-%m-%d'))
def get_kibana(period=3600):
return ElecenaKibana(es_host=getenv('ES_HOST'), index_prefix='syslog-ng', index_sep='_', period=period)
# borrowed from https://github.com/macbre/query-digest/blob/master/digest/helpers.py
def normalize_likes(sql):
"""
Normalize and wrap LIKE statements
:type sql str
:rtype str
"""
sql = sql.replace('%', '')
# LIKE '%bot'
sql = re.sub(r"LIKE '[^\']+'", 'LIKE X', sql)
# or all_groups LIKE X or all_groups LIKE X
matches = re.finditer(r'(or|and) [^\s]+ LIKE X', sql, flags=re.IGNORECASE)
matches = set([match.group(0) for match in matches]) if matches else None
if matches:
for match in matches:
sql = re.sub(r'(\s?' + re.escape(match) + ')+', ' ' + match + ' ...', sql)
return sql
def remove_comments_from_sql(sql):
"""
Removes comments from SQL query
:type sql str|None
:rtype str
"""
return re.sub(r'\s?/\*.+\*/', '', sql)
def generalize_sql(sql):
"""
Removes most variables from an SQL query and replaces them with X or N for numbers.
Based on Mediawiki's DatabaseBase::generalizeSQL
:type sql str|None
:rtype str
"""
if sql is None:
return None
# multiple spaces
sql = re.sub(r'\s{2,}', ' ', sql)
# MW comments
# e.g. /* CategoryDataService::getMostVisited N.N.N.N */
sql = remove_comments_from_sql(sql)
sql = sql.replace('SNIPPETS(', 'SNIPPETS (')
kind = sql.split(' ')[0].upper()
if kind in ['UPDATE', 'CALL']:
sql = ' '.join(sql.split(' ')[:2])
# handle LIKE statements
sql = normalize_likes(sql)
sql = re.sub(r"\\\\", '', sql)
sql = re.sub(r"\\'", '', sql)
sql = re.sub(r'\\"', '', sql)
sql = re.sub(r"'[^\']*'", 'X', sql)
sql = re.sub(r'"[^\"]*"', 'X', sql)
# All newlines, tabs, etc replaced by single space
sql = re.sub(r'\s+', ' ', sql)
# All numbers => N
sql = re.sub(r'-?[0-9]+', 'N', sql)
# WHERE foo IN ('880987','882618','708228','522330')
sql = re.sub(r' (IN|VALUES)\s*\([^,]+,[^)]+\)', ' \\1 (XYZ)', sql, flags=re.IGNORECASE)
# added
sql = re.sub(r'(N,)+', 'N..N', sql)
sql = re.sub(r'(X,)+', 'X..X', sql)
sql = re.sub(r'(i:N;)+', 'N..N', sql)
sql = re.sub(r'WHERE.*', 'WHERE', sql)
return sql.strip()