-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy patharclamp-grep.py
executable file
·177 lines (145 loc) · 5.25 KB
/
arclamp-grep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
arclamp-grep -- analyze Arc Lamp logs. This is a CLI tool for parsing trace
logs and printing a leaderboard of the functions which are most
frequently on-CPU.
usage: arclamp-grep [--resolution TIME] [--entrypoint NAME]
[--grep STRING] [--slice SLICE] [--count COUNT]
[--channel CHANNEL]
Options:
--resolution TIME Which log files to analyze. May be one of 'hourly',
'daily', or 'weekly'. (Default: 'daily').
--entrypoint NAME Analyze logs for this entry point. May be one of
'all', 'index', 'api', or 'load'). (Default: 'all').
--grep STRING Only include stacks which include this string
--count COUNT Show the top COUNT entries. (Default: 20).
--slice SLICE Slice of files to analyze, in Python slice notation.
Files are ordered from oldest to newest, so
'--slice=-2:' means the two most recent files.
--channel CHANNEL Which channel to look at. defaults to "xenon"
Copyright 2015 Ori Livneh <[email protected]>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY CODE, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import collections
import fnmatch
import glob
import gzip
import operator
import os.path
import re
import sys
import textwrap
# Stack frames which match any of these shell-style wildcard patterns
# are excluded from the leaderboard.
SKIP_PATTERNS = ('*BagOStuff*', '*Http::exec*', '*ObjectCache*', '/srv*',
'AutoLoader*', 'Curl*', 'Database*', 'Hooks*', 'Http::*',
'LoadBalancer*', 'Memcached*', 'wfGetDB*')
RESET = '\033[0m'
YELLOW = '\033[93m'
def slicer(spec):
args = re.match('(-?[0-9]+)?(?::(-?[0-9]+))?', spec).groups()
args = [int(arg) if arg is not None else arg for arg in args]
return lambda seq: operator.getitem(seq, slice(*args))
def should_skip(f):
return f.lower() == f or any(fnmatch.fnmatch(f, p) for p in SKIP_PATTERNS)
def parse_line(line):
line = re.sub(r'\d\.\d\dwmf\d+', 'X.XXwmfXX', line.rstrip())
funcs, count = line.split(' ', 1)
return funcs.split(';'), int(count)
def grep(fname, search_string):
if fname.endswith(".gz"):
opener = gzip.open
else:
opener = open
with opener(fname) as f:
for line in f:
if search_string in line:
yield line
def iter_funcs(files):
for fname in files:
for line in grep(fname, args.grep):
funcs, count = parse_line(line)
while funcs and should_skip(funcs[-1]):
funcs.pop()
if funcs:
func = funcs.pop()
for _ in range(count):
yield func
if {'-h', '--help'}.intersection(sys.argv):
sys.exit(textwrap.dedent(__doc__))
arg_parser = argparse.ArgumentParser(add_help=False)
arg_parser.add_argument(
'--resolution',
default='daily',
choices=('hourly', 'daily', 'weekly'),
)
arg_parser.add_argument(
'--count',
default=20,
type=int,
help='show this many entries',
)
arg_parser.add_argument(
'--entrypoint',
choices=('all', 'index', 'api', 'load'),
default='all',
)
arg_parser.add_argument(
'--grep',
default='',
help='only include stacks which include this string',
)
arg_parser.add_argument(
'--slice',
default='-2:',
help='slice of files to consider',
type=slicer,
)
arg_parser.add_argument(
'--channel',
default='xenon',
help='What channel to look at',
choices=['xenon', 'excimer'],
)
args = arg_parser.parse_args()
# Legacy: the 'xenon' channel has a generic filename for now.
if args.channel == 'xenon':
glob_pattern = '/srv/arclamp/logs/%(resolution)s/*.%(entrypoint)s.log*'
else:
glob_pattern = '/srv/arclamp/logs/%(resolution)s/*.%(channel)s.%(entrypoint)s.log*'
file_names = glob.glob(glob_pattern % vars(args))
file_names.sort(key=os.path.getmtime)
file_names = args.slice(file_names)
file_names = [fn for fn in file_names if fn.endswith(".log.gz") or fn.endswith(".log")]
counter = collections.Counter(iter_funcs(file_names))
total = sum(1 for _ in counter.elements())
max_len = max(len(f) for f, _ in counter.most_common(args.count))
desc = 'Top %d functions' % args.count
if args.grep:
desc += ' in traces matching "%s"' % args.grep
if args.entrypoint == 'all':
desc += ', all entry-points:'
else:
desc += ', %s.php:' % args.entrypoint
print(desc)
print('-' * len(desc))
for idx, (func, count) in enumerate(counter.most_common(args.count)):
ordinal = idx + 1
percent = 100.0 * count / total
func = YELLOW + (('%% -%ds' % max_len) % func) + RESET
print('% 4d | %s |% 5.2f%%' % (ordinal, func, percent))
print('-' * len(desc))
print('Log files:')
for f in file_names:
print(' - %s' % f)
print('')