-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathhanging_threads.py
222 lines (174 loc) · 6.33 KB
/
hanging_threads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/python
"""
Copy this code and do
import hanging_threads
If a thread is at the same place for SECONDS_FROZEN then the
stacktrace is printed.
When example.py is ran, output is the following:
Starting the deadlocks monitoring
Sleep 3 seconds in custom func
---------- Thread 140536184002304 hangs ----------
File "example.py", line 12, in <module>
sleep(3)
File "example.py", line 6, in sleep
time.sleep(t)
Sleep 3 seconds
---------- Thread 140536184002304 awaked ----------
---------- Thread 140536184002304 hangs ----------
File "example.py", line 14, in <module>
time.sleep(3)
Sleep 3 seconds
---------- Thread 140536184002304 awaked ----------
---------- Thread 140536184002304 hangs ----------
File "example.py", line 16, in <module>
time.sleep(3)
Stopping the deadlocks monitoring
Sleep 3 seconds
Sleep 3 seconds
Exiting
"""
import sys
import threading
import linecache
import time
__version__ = "2.0.7"
__author__ = "Nicco Kunzmann"
SECONDS_FROZEN = 10 # seconds
TEST_INTERVAL = 100 # milliseconds
def start_monitoring(seconds_frozen=SECONDS_FROZEN,
test_interval=TEST_INTERVAL):
"""Start monitoring for hanging threads.
seconds_frozen - How much time should thread hang to activate
printing stack trace - default(10)
tests_interval - Sleep time of monitoring thread (in milliseconds)
- default(100)
"""
thread = StoppableThread(target=monitor, args=(seconds_frozen,
test_interval))
thread.daemon = True
thread.start()
return thread
class StoppableThread(threading.Thread):
"""Thread class with a stop() method.
The thread itself has to check regularly for the is_stopped()
condition.
"""
def __init__(self, *args, **kwargs):
super(StoppableThread, self).__init__(*args, **kwargs)
self._stopped = False
def stop(self):
self._stopped = True
def is_stopped(self):
return self._stopped
def monitor(seconds_frozen, test_interval):
"""Monitoring thread function.
Checks if thread is hanging for time defined by
``seconds_frozen`` parameter every ``test_interval`` milliseconds.
"""
current_thread = threading.current_thread()
hanging_threads = set()
old_threads = {} # Threads found on previous iteration.
while not current_thread.is_stopped():
new_threads = get_current_frames()
# Report died threads.
for thread_id, thread_data in old_threads.items():
if thread_id not in new_threads and thread_id in hanging_threads:
log_died_thread(thread_data)
# Process live threads.
time.sleep(test_interval/1000.)
now = time.time()
then = now - seconds_frozen
for thread_id, thread_data in new_threads.items():
# Don't report the monitor thread.
if thread_id == current_thread.ident:
continue
frame = thread_data['frame']
# If thread is new or it's stack is changed then update time.
if (thread_id not in old_threads or
frame != old_threads[thread_id]['frame']):
thread_data['time'] = now
# If the thread was hanging then report awaked thread.
if thread_id in hanging_threads:
hanging_threads.remove(thread_id)
log_awaked_thread(thread_data)
else:
# If stack is not changed then keep old time.
last_change_time = old_threads[thread_id]['time']
thread_data['time'] = last_change_time
# Check if this is a new hanging thread.
if (thread_id not in hanging_threads and
last_change_time < then):
# Gotcha!
hanging_threads.add(thread_id)
# Report the hanged thread.
log_hanged_thread(thread_data, frame)
old_threads = new_threads
def get_current_frames():
"""Return current threads prepared for
further processing.
"""
threads = {thread.ident: thread for thread in threading.enumerate()}
frames = {}
for thread_id, frame in sys._current_frames().items():
thread = threads.get(thread_id, None)
if thread is not None:
frames[thread_id] = {
'frame': thread2list(frame),
'time': None,
'id': thread_id,
'name': thread.name,
'object': thread,
}
return frames
def frame2string(frame):
"""Return info about frame.
Keyword arg:
frame
Return string in format:
File {file name}, line {line number}, in
{name of parent of code object} {newline}
Line from file at line number
"""
lineno = frame.f_lineno # or f_lasti
co = frame.f_code
filename = co.co_filename
name = co.co_name
s = '\tFile "{0}", line {1}, in {2}'.format(filename, lineno, name)
try:
line = linecache.getline(filename, lineno, frame.f_globals).lstrip()
except Exception as e:
## TypeError: '<=' not supported between instances of 'int' and 'NoneType'
line = ''
print(e)
return s + '\n\t\t' + line
def thread2list(frame):
"""Return list with string frame representation of each frame of
thread.
"""
l = []
while frame:
l.insert(0, frame2string(frame))
frame = frame.f_back
return l
def threadcaption(thread_data):
return 'Thread {id} "{name}"'.format(**thread_data)
def log_hanged_thread(thread_data, frame):
"""Print the stack trace of the deadlock after hanging
`seconds_frozen`.
"""
write_log('{0} hangs '.format(threadcaption(thread_data)), ''.join(frame))
def log_awaked_thread(thread_data):
"""Print message about awaked thread that was considered as
hanging.
"""
write_log('{0} awaked'.format(threadcaption(thread_data)))
def log_died_thread(thread_data):
"""Print message about died thread that was considered as
hanging.
"""
write_log('{0} died '.format(threadcaption(thread_data)))
def write_log(title, message=''):
"""Write formatted log message to stderr."""
sys.stderr.write(''.join([
title.center(40).center(60, '-'), '\n', message
]))