Fix kubetoolsca#71: Correct krs health for multi-container pods

MinhPhamLapTrinh · Oct 15, 2024 · 5cd825c · 5cd825c
1 parent 749c88a
commit 5cd825c
Show file tree

Hide file tree

Showing 4 changed files with 158 additions and 110 deletions.
diff --git a/build/lib/krs/main.py b/build/lib/krs/main.py
@@ -174,23 +174,23 @@ def print_recommendations(self):
 
     def health_check(self, change_model=False, device='cpu'):
 
-        if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
-            continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
-            while True:
-                if continue_previous_chat not in ['y', 'n']:
-                    continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
-                else:
-                    break
-
-            if continue_previous_chat=='y':
-                krsllmclient = KrsGPTClient(device=device)
-                self.continue_chat = True
-            else:
-                krsllmclient = KrsGPTClient(reset_history=True, device=device)
+        # if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
+        #     continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
+        #     while True:
+        #         if continue_previous_chat not in ['y', 'n']:
+        #             continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
+        #         else:
+        #             break
+
+        #     if continue_previous_chat=='y':
+        #         krsllmclient = KrsGPTClient(device=device)
+        #         self.continue_chat = True
+        #     else:
+        #         krsllmclient = KrsGPTClient(reset_history=True, device=device)
 
-        else:
-            krsllmclient = KrsGPTClient(reinitialize=True, device=device)
-            self.continue_chat = False
+        # else:
+        #     krsllmclient = KrsGPTClient(reinitialize=True, device=device)
+        #     self.continue_chat = False
 
         if not self.continue_chat:
 
@@ -227,10 +227,8 @@ def health_check(self, change_model=False, device='cpu'):
 
             print("\nExtracting logs and events from the pod...")
 
-            logs_from_pod = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)
-
-            self.logs_extracted = extract_log_entries(logs_from_pod)
-
+            self.logs_extracted = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index) 
+            print(self.logs_extracted)                                                         
             print("\nLogs and events from the pod extracted successfully!\n")
 
         prompt_to_llm = self.create_prompt(self.logs_extracted)
@@ -243,8 +241,27 @@ def get_logs_from_pod(self, namespace_index, pod_index):
         try:
             namespace_index -= 1
             pod_index -= 1
-            namespace = list(self.list_namespaces())[namespace_index]
-            return list(self.pod_info[namespace][pod_index]['info']['Logs'].values())[0]
+            namespace = list(self.list_namespaces())[namespace_index]             
+            formatted_logs = ""                 
+            container_status = {}
+            for container_state in self.pod_info[namespace][pod_index]['info']['PodInfo']['status']['container_statuses']:                           
+                container_status[container_state['name']] = container_state['state']['running']
+
+            for container_name, log in self.pod_info[namespace][pod_index]['info']['Logs'].items():            
+                status = container_status.get(container_name)                                           
+                extracted_logs = extract_log_entries(log, status) 
+                if str(status) == "None": 
+                    formatted_logs += f"\n{container_name}\n"                    
+                    formatted_logs += f"{extracted_logs}\n"
+                else:
+                    filtered_log = {log_line for log_line in extracted_logs if "Error" in log_line or "Failed" in log_line}                            
+                    if filtered_log:
+                        formatted_logs += f"\n{container_name}\n"
+                        for _, log_entry in enumerate(filtered_log, 1):
+                            formatted_logs += f"{log_entry}\n"                    
+                    else:
+                        formatted_logs += f"\nContainer {container_name} has no ERRORS or FAILED logs string\n"                                           
+            return formatted_logs
         except KeyError as e:
             print("\nKindly enter a value from the available namespaces and pods")
             return None

diff --git a/build/lib/krs/utils/functional.py b/build/lib/krs/utils/functional.py
@@ -32,44 +32,51 @@ def filter_similar_entries(log_entries):
     filtered_entries = {entry for entry in unique_entries if entry not in to_remove}
     return filtered_entries
 
-def extract_log_entries(log_contents):
+def extract_log_entries(log_contents, container_state):
     # Patterns to match different log formats
     patterns = [
         re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z\s+(warn|error)\s+\S+\s+(.*)', re.IGNORECASE),
-        re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
-        re.compile(r'({.*})')  
+        re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),                
+        re.compile(r'({.*})')
     ]
 
     log_entries = set()
-    # Attempt to match each line with all patterns
-    for line in log_contents.split('\n'):
-        for pattern in patterns:
-            match = pattern.search(line)
-            if match:
-                if match.groups()[0].startswith('{'):
-                    # Handle JSON formatted log entries
-                    try:
-                        log_json = json.loads(match.group(1))
-                        if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
-                            level = "Error" if log_json['severity'] == "ERROR" else "Warning"
-                            message = log_json.get('error', '') if 'error' in log_json.keys() else line
-                            log_entries.add(f"{level}: {message.strip()}")
-                        elif 'level' in log_json:
-                            level = "Error" if log_json['level'] == "error" else "Warning"
-                            message = log_json.get('msg', '')  + log_json.get('error', '')
-                            log_entries.add(f"{level}: {message.strip()}")
-                    except json.JSONDecodeError:
-                        continue  # Skip if JSON is not valid
-                else:
-                    if len(match.groups()) == 2:
-                        level, message = match.groups()
-                    elif len(match.groups()) == 1:
-                        message = match.group(1)  # Assuming error as default
-                        level = "ERROR"  # Default if not specified in the log
+    # Attempt to match each line with all patterns    
+    for line in log_contents.split('\n'):             
+        if str(container_state) == 'None':
+            log_entries.add(line)
+        else:
+            for pattern in patterns:
+                match = pattern.search(line)
+                if match:                  
+                    if match.groups()[0].startswith('{'):                
+                        # Handle JSON formatted log entries
+                        try:
+                            log_json = json.loads(match.group(1))
+                            # print(log_json)
+                            if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
+                                level = "Error" if log_json['severity'] == "ERROR" else "Warning"
+                                message = log_json.get('error', '') if 'error' in log_json.keys() else line
+                                log_entries.add(f"{level}: {message.strip()}")
+                            elif 'level' in log_json:
+                                level = "Error" if log_json['level'] == "error" else "Warning"
+                                message = log_json.get('msg', '')  + log_json.get('error', '')
+                                log_entries.add(f"{level}: {message.strip()}")
+                        except json.JSONDecodeError:
+                            continue  # Skip if JSON is not valid
+                    else:
+                        if len(match.groups()) == 2:
+                            level, message = match.groups()
+                        elif len(match.groups()) == 1:                
+                            message = match.group(1)  # Assuming error as default       
+
+                            level = "ERROR"  # Default if not specified in the log
+
+                        level = "Error" if "error" in level.lower() else "Warning"
+                        formatted_message = f"{level}: {message.strip()}"
+                        log_entries.add(formatted_message)
+                    break  # Stop after the first match
+    return filter_similar_entries(log_entries)
+
 
-                    level = "Error" if "error" in level.lower() else "Warning"
-                    formatted_message = f"{level}: {message.strip()}"
-                    log_entries.add(formatted_message)
-                break  # Stop after the first match
 
-    return filter_similar_entries(log_entries)
diff --git a/krs/main.py b/krs/main.py
@@ -174,23 +174,23 @@ def print_recommendations(self):
 
     def health_check(self, change_model=False, device='cpu'):
 
-        if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
-            continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
-            while True:
-                if continue_previous_chat not in ['y', 'n']:
-                    continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
-                else:
-                    break
-
-            if continue_previous_chat=='y':
-                krsllmclient = KrsGPTClient(device=device)
-                self.continue_chat = True
-            else:
-                krsllmclient = KrsGPTClient(reset_history=True, device=device)
+        # if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model:
+        #     continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ")
+        #     while True:
+        #         if continue_previous_chat not in ['y', 'n']:
+        #             continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ")
+        #         else:
+        #             break
+
+        #     if continue_previous_chat=='y':
+        #         krsllmclient = KrsGPTClient(device=device)
+        #         self.continue_chat = True
+        #     else:
+        #         krsllmclient = KrsGPTClient(reset_history=True, device=device)
 
-        else:
-            krsllmclient = KrsGPTClient(reinitialize=True, device=device)
-            self.continue_chat = False
+        # else:
+        #     krsllmclient = KrsGPTClient(reinitialize=True, device=device)
+        #     self.continue_chat = False
 
         if not self.continue_chat:
 
@@ -227,10 +227,8 @@ def health_check(self, change_model=False, device='cpu'):
 
             print("\nExtracting logs and events from the pod...")
 
-            logs_from_pod = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index)
-
-            self.logs_extracted = extract_log_entries(logs_from_pod)
-
+            self.logs_extracted = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index) 
+
             print("\nLogs and events from the pod extracted successfully!\n")
 
         prompt_to_llm = self.create_prompt(self.logs_extracted)
@@ -243,8 +241,27 @@ def get_logs_from_pod(self, namespace_index, pod_index):
         try:
             namespace_index -= 1
             pod_index -= 1
-            namespace = list(self.list_namespaces())[namespace_index]
-            return list(self.pod_info[namespace][pod_index]['info']['Logs'].values())[0]
+            namespace = list(self.list_namespaces())[namespace_index]             
+            formatted_logs = ""                 
+            container_status = {}
+            for container_state in self.pod_info[namespace][pod_index]['info']['PodInfo']['status']['container_statuses']:                           
+                container_status[container_state['name']] = container_state['state']['running']
+
+            for container_name, log in self.pod_info[namespace][pod_index]['info']['Logs'].items():            
+                status = container_status.get(container_name)                                           
+                extracted_logs = extract_log_entries(log, status) 
+                if str(status) == "None": 
+                    formatted_logs += f"\n{container_name}\n"                    
+                    formatted_logs += f"{extracted_logs}\n"
+                else:
+                    filtered_log = {log_line for log_line in extracted_logs if "Error" in log_line or "Failed" in log_line}                            
+                    if filtered_log:
+                        formatted_logs += f"\n{container_name}\n"
+                        for _, log_entry in enumerate(filtered_log, 1):
+                            formatted_logs += f"{log_entry}\n"                    
+                    else:
+                        formatted_logs += f"\nContainer {container_name} has no ERRORS or FAILED logs string\n"                                           
+            return formatted_logs
         except KeyError as e:
             print("\nKindly enter a value from the available namespaces and pods")
             return None

diff --git a/krs/utils/functional.py b/krs/utils/functional.py
@@ -32,44 +32,51 @@ def filter_similar_entries(log_entries):
     filtered_entries = {entry for entry in unique_entries if entry not in to_remove}
     return filtered_entries
 
-def extract_log_entries(log_contents):
+def extract_log_entries(log_contents, container_state):
     # Patterns to match different log formats
     patterns = [
         re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z\s+(warn|error)\s+\S+\s+(.*)', re.IGNORECASE),
-        re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),
-        re.compile(r'({.*})')  
+        re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'),                
+        re.compile(r'({.*})')
     ]
 
     log_entries = set()
-    # Attempt to match each line with all patterns
-    for line in log_contents.split('\n'):
-        for pattern in patterns:
-            match = pattern.search(line)
-            if match:
-                if match.groups()[0].startswith('{'):
-                    # Handle JSON formatted log entries
-                    try:
-                        log_json = json.loads(match.group(1))
-                        if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
-                            level = "Error" if log_json['severity'] == "ERROR" else "Warning"
-                            message = log_json.get('error', '') if 'error' in log_json.keys() else line
-                            log_entries.add(f"{level}: {message.strip()}")
-                        elif 'level' in log_json:
-                            level = "Error" if log_json['level'] == "error" else "Warning"
-                            message = log_json.get('msg', '')  + log_json.get('error', '')
-                            log_entries.add(f"{level}: {message.strip()}")
-                    except json.JSONDecodeError:
-                        continue  # Skip if JSON is not valid
-                else:
-                    if len(match.groups()) == 2:
-                        level, message = match.groups()
-                    elif len(match.groups()) == 1:
-                        message = match.group(1)  # Assuming error as default
-                        level = "ERROR"  # Default if not specified in the log
+    # Attempt to match each line with all patterns    
+    for line in log_contents.split('\n'):             
+        if str(container_state) == 'None':
+            log_entries.add(line)
+        else:
+            for pattern in patterns:
+                match = pattern.search(line)
+                if match:                  
+                    if match.groups()[0].startswith('{'):                
+                        # Handle JSON formatted log entries
+                        try:
+                            log_json = json.loads(match.group(1))
+                            # print(log_json)
+                            if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']:
+                                level = "Error" if log_json['severity'] == "ERROR" else "Warning"
+                                message = log_json.get('error', '') if 'error' in log_json.keys() else line
+                                log_entries.add(f"{level}: {message.strip()}")
+                            elif 'level' in log_json:
+                                level = "Error" if log_json['level'] == "error" else "Warning"
+                                message = log_json.get('msg', '')  + log_json.get('error', '')
+                                log_entries.add(f"{level}: {message.strip()}")
+                        except json.JSONDecodeError:
+                            continue  # Skip if JSON is not valid
+                    else:
+                        if len(match.groups()) == 2:
+                            level, message = match.groups()
+                        elif len(match.groups()) == 1:                
+                            message = match.group(1)  # Assuming error as default       
+
+                            level = "ERROR"  # Default if not specified in the log
+
+                        level = "Error" if "error" in level.lower() else "Warning"
+                        formatted_message = f"{level}: {message.strip()}"
+                        log_entries.add(formatted_message)
+                    break  # Stop after the first match
+    return filter_similar_entries(log_entries)
+
 
-                    level = "Error" if "error" in level.lower() else "Warning"
-                    formatted_message = f"{level}: {message.strip()}"
-                    log_entries.add(formatted_message)
-                break  # Stop after the first match
 
-    return filter_similar_entries(log_entries)