Skip to content

Commit

Permalink
Fix kubetoolsca#71: Correct krs health for multi-container pods
Browse files Browse the repository at this point in the history
  • Loading branch information
MinhPhamLapTrinh committed Oct 15, 2024
1 parent 749c88a commit 5cd825c
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 110 deletions.
61 changes: 39 additions & 22 deletions build/lib/krs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,23 +174,23 @@ def print_recommendations(self):

def health_check(self, change_model=False, device='cpu'):

if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
while True:
if continue_previous_chat not in ['y', 'n']:
continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
else:
break

if continue_previous_chat=='y':
krsllmclient = KrsGPTClient(device=device)
self.continue_chat = True
else:
krsllmclient = KrsGPTClient(reset_history=True, device=device)
# if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
# continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
# while True:
# if continue_previous_chat not in ['y', 'n']:
# continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
# else:
# break

# if continue_previous_chat=='y':
# krsllmclient = KrsGPTClient(device=device)
# self.continue_chat = True
# else:
# krsllmclient = KrsGPTClient(reset_history=True, device=device)

else:
krsllmclient = KrsGPTClient(reinitialize=True, device=device)
self.continue_chat = False
# else:
# krsllmclient = KrsGPTClient(reinitialize=True, device=device)
# self.continue_chat = False

if not self.continue_chat:

Expand Down Expand Up @@ -227,10 +227,8 @@ def health_check(self, change_model=False, device='cpu'):

print("\nExtracting logs and events from the pod...")

logs_from_pod = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)

self.logs_extracted = extract_log_entries(logs_from_pod)

self.logs_extracted = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)
print(self.logs_extracted)
print("\nLogs and events from the pod extracted successfully!\n")

prompt_to_llm = self.create_prompt(self.logs_extracted)
Expand All @@ -243,8 +241,27 @@ def get_logs_from_pod(self, namespace_index, pod_index):
try:
namespace_index -= 1
pod_index -= 1
namespace = list(self.list_namespaces())[namespace_index]
return list(self.pod_info[namespace][pod_index]['info']['Logs'].values())[0]
namespace = list(self.list_namespaces())[namespace_index]
formatted_logs = ""
container_status = {}
for container_state in self.pod_info[namespace][pod_index]['info']['PodInfo']['status']['container_statuses']:
container_status[container_state['name']] = container_state['state']['running']

for container_name, log in self.pod_info[namespace][pod_index]['info']['Logs'].items():
status = container_status.get(container_name)
extracted_logs = extract_log_entries(log, status)
if str(status) == "None":
formatted_logs += f"\n{container_name}\n"
formatted_logs += f"{extracted_logs}\n"
else:
filtered_log = {log_line for log_line in extracted_logs if "Error" in log_line or "Failed" in log_line}
if filtered_log:
formatted_logs += f"\n{container_name}\n"
for _, log_entry in enumerate(filtered_log, 1):
formatted_logs += f"{log_entry}\n"
else:
formatted_logs += f"\nContainer {container_name} has no ERRORS or FAILED logs string\n"
return formatted_logs
except KeyError as e:
print("\nKindly enter a value from the available namespaces and pods")
return None
Expand Down
73 changes: 40 additions & 33 deletions build/lib/krs/utils/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,44 +32,51 @@ def filter_similar_entries(log_entries):
filtered_entries = {entry for entry in unique_entries if entry not in to_remove}
return filtered_entries

def extract_log_entries(log_contents):
def extract_log_entries(log_contents, container_state):
# Patterns to match different log formats
patterns = [
re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z\s+(warn|error)\s+\S+\s+(.*)', re.IGNORECASE),
re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
re.compile(r'({.*})')
re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
re.compile(r'({.*})')
]

log_entries = set()
# Attempt to match each line with all patterns
for line in log_contents.split('\n'):
for pattern in patterns:
match = pattern.search(line)
if match:
if match.groups()[0].startswith('{'):
# Handle JSON formatted log entries
try:
log_json = json.loads(match.group(1))
if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
level = "Error" if log_json['severity'] == "ERROR" else "Warning"
message = log_json.get('error', '') if 'error' in log_json.keys() else line
log_entries.add(f"{level}: {message.strip()}")
elif 'level' in log_json:
level = "Error" if log_json['level'] == "error" else "Warning"
message = log_json.get('msg', '') + log_json.get('error', '')
log_entries.add(f"{level}: {message.strip()}")
except json.JSONDecodeError:
continue # Skip if JSON is not valid
else:
if len(match.groups()) == 2:
level, message = match.groups()
elif len(match.groups()) == 1:
message = match.group(1) # Assuming error as default
level = "ERROR" # Default if not specified in the log
# Attempt to match each line with all patterns
for line in log_contents.split('\n'):
if str(container_state) == 'None':
log_entries.add(line)
else:
for pattern in patterns:
match = pattern.search(line)
if match:
if match.groups()[0].startswith('{'):
# Handle JSON formatted log entries
try:
log_json = json.loads(match.group(1))
# print(log_json)
if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
level = "Error" if log_json['severity'] == "ERROR" else "Warning"
message = log_json.get('error', '') if 'error' in log_json.keys() else line
log_entries.add(f"{level}: {message.strip()}")
elif 'level' in log_json:
level = "Error" if log_json['level'] == "error" else "Warning"
message = log_json.get('msg', '') + log_json.get('error', '')
log_entries.add(f"{level}: {message.strip()}")
except json.JSONDecodeError:
continue # Skip if JSON is not valid
else:
if len(match.groups()) == 2:
level, message = match.groups()
elif len(match.groups()) == 1:
message = match.group(1) # Assuming error as default

level = "ERROR" # Default if not specified in the log

level = "Error" if "error" in level.lower() else "Warning"
formatted_message = f"{level}: {message.strip()}"
log_entries.add(formatted_message)
break # Stop after the first match
return filter_similar_entries(log_entries)


level = "Error" if "error" in level.lower() else "Warning"
formatted_message = f"{level}: {message.strip()}"
log_entries.add(formatted_message)
break # Stop after the first match

return filter_similar_entries(log_entries)
61 changes: 39 additions & 22 deletions krs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,23 +174,23 @@ def print_recommendations(self):

def health_check(self, change_model=False, device='cpu'):

if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
while True:
if continue_previous_chat not in ['y', 'n']:
continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
else:
break

if continue_previous_chat=='y':
krsllmclient = KrsGPTClient(device=device)
self.continue_chat = True
else:
krsllmclient = KrsGPTClient(reset_history=True, device=device)
# if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
# continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
# while True:
# if continue_previous_chat not in ['y', 'n']:
# continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
# else:
# break

# if continue_previous_chat=='y':
# krsllmclient = KrsGPTClient(device=device)
# self.continue_chat = True
# else:
# krsllmclient = KrsGPTClient(reset_history=True, device=device)

else:
krsllmclient = KrsGPTClient(reinitialize=True, device=device)
self.continue_chat = False
# else:
# krsllmclient = KrsGPTClient(reinitialize=True, device=device)
# self.continue_chat = False

if not self.continue_chat:

Expand Down Expand Up @@ -227,10 +227,8 @@ def health_check(self, change_model=False, device='cpu'):

print("\nExtracting logs and events from the pod...")

logs_from_pod = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)

self.logs_extracted = extract_log_entries(logs_from_pod)

self.logs_extracted = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)

print("\nLogs and events from the pod extracted successfully!\n")

prompt_to_llm = self.create_prompt(self.logs_extracted)
Expand All @@ -243,8 +241,27 @@ def get_logs_from_pod(self, namespace_index, pod_index):
try:
namespace_index -= 1
pod_index -= 1
namespace = list(self.list_namespaces())[namespace_index]
return list(self.pod_info[namespace][pod_index]['info']['Logs'].values())[0]
namespace = list(self.list_namespaces())[namespace_index]
formatted_logs = ""
container_status = {}
for container_state in self.pod_info[namespace][pod_index]['info']['PodInfo']['status']['container_statuses']:
container_status[container_state['name']] = container_state['state']['running']

for container_name, log in self.pod_info[namespace][pod_index]['info']['Logs'].items():
status = container_status.get(container_name)
extracted_logs = extract_log_entries(log, status)
if str(status) == "None":
formatted_logs += f"\n{container_name}\n"
formatted_logs += f"{extracted_logs}\n"
else:
filtered_log = {log_line for log_line in extracted_logs if "Error" in log_line or "Failed" in log_line}
if filtered_log:
formatted_logs += f"\n{container_name}\n"
for _, log_entry in enumerate(filtered_log, 1):
formatted_logs += f"{log_entry}\n"
else:
formatted_logs += f"\nContainer {container_name} has no ERRORS or FAILED logs string\n"
return formatted_logs
except KeyError as e:
print("\nKindly enter a value from the available namespaces and pods")
return None
Expand Down
73 changes: 40 additions & 33 deletions krs/utils/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,44 +32,51 @@ def filter_similar_entries(log_entries):
filtered_entries = {entry for entry in unique_entries if entry not in to_remove}
return filtered_entries

def extract_log_entries(log_contents):
def extract_log_entries(log_contents, container_state):
# Patterns to match different log formats
patterns = [
re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z\s+(warn|error)\s+\S+\s+(.*)', re.IGNORECASE),
re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
re.compile(r'({.*})')
re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
re.compile(r'({.*})')
]

log_entries = set()
# Attempt to match each line with all patterns
for line in log_contents.split('\n'):
for pattern in patterns:
match = pattern.search(line)
if match:
if match.groups()[0].startswith('{'):
# Handle JSON formatted log entries
try:
log_json = json.loads(match.group(1))
if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
level = "Error" if log_json['severity'] == "ERROR" else "Warning"
message = log_json.get('error', '') if 'error' in log_json.keys() else line
log_entries.add(f"{level}: {message.strip()}")
elif 'level' in log_json:
level = "Error" if log_json['level'] == "error" else "Warning"
message = log_json.get('msg', '') + log_json.get('error', '')
log_entries.add(f"{level}: {message.strip()}")
except json.JSONDecodeError:
continue # Skip if JSON is not valid
else:
if len(match.groups()) == 2:
level, message = match.groups()
elif len(match.groups()) == 1:
message = match.group(1) # Assuming error as default
level = "ERROR" # Default if not specified in the log
# Attempt to match each line with all patterns
for line in log_contents.split('\n'):
if str(container_state) == 'None':
log_entries.add(line)
else:
for pattern in patterns:
match = pattern.search(line)
if match:
if match.groups()[0].startswith('{'):
# Handle JSON formatted log entries
try:
log_json = json.loads(match.group(1))
# print(log_json)
if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
level = "Error" if log_json['severity'] == "ERROR" else "Warning"
message = log_json.get('error', '') if 'error' in log_json.keys() else line
log_entries.add(f"{level}: {message.strip()}")
elif 'level' in log_json:
level = "Error" if log_json['level'] == "error" else "Warning"
message = log_json.get('msg', '') + log_json.get('error', '')
log_entries.add(f"{level}: {message.strip()}")
except json.JSONDecodeError:
continue # Skip if JSON is not valid
else:
if len(match.groups()) == 2:
level, message = match.groups()
elif len(match.groups()) == 1:
message = match.group(1) # Assuming error as default

level = "ERROR" # Default if not specified in the log

level = "Error" if "error" in level.lower() else "Warning"
formatted_message = f"{level}: {message.strip()}"
log_entries.add(formatted_message)
break # Stop after the first match
return filter_similar_entries(log_entries)


level = "Error" if "error" in level.lower() else "Warning"
formatted_message = f"{level}: {message.strip()}"
log_entries.add(formatted_message)
break # Stop after the first match

return filter_similar_entries(log_entries)

0 comments on commit 5cd825c

Please sign in to comment.