Skip to content

Commit

Permalink
Add json output option for recommender (#511)
Browse files Browse the repository at this point in the history
  • Loading branch information
jtydlack committed Feb 8, 2024
1 parent fa59834 commit c2896d0
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 17 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ __pycache__/*
*.out
kube-burner*
kube_burner*
recommender_*.json

# Project files
.ropeproject
Expand Down
1 change: 1 addition & 0 deletions config/recommender_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ auth_token: <Auth_Token>
scrape_duration: 10m
chaos_library: "kraken"
log_level: INFO
JSON_output: False

# for output purpose only do not change if not needed
chaos_tests:
Expand Down
56 changes: 51 additions & 5 deletions kraken/chaos_recommender/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

KRAKEN_TESTS_PATH = "./kraken_chaos_tests.txt"

#Placeholder, this should be done with topology

# Placeholder, this should be done with topology
def return_critical_services():
return ["web", "cart"]

Expand All @@ -19,6 +20,7 @@ def load_telemetry_data(file_path):
data = pd.read_csv(file_path, delimiter=r"\s+")
return data


def calculate_zscores(data):
zscores = pd.DataFrame()
zscores["Service"] = data["service"]
Expand All @@ -27,6 +29,7 @@ def calculate_zscores(data):
zscores["Network"] = (data["NETWORK"] - data["NETWORK"].mean()) / data["NETWORK"].std()
return zscores


def identify_outliers(data):
outliers_cpu = data[data["CPU"] > threshold]["Service"].tolist()
outliers_memory = data[data["Memory"] > threshold]["Service"].tolist()
Expand All @@ -47,7 +50,7 @@ def get_services_above_heatmap_threshold(dataframe, cpu_threshold, mem_threshold
return cpu_services, mem_services


def analysis(file_path, chaos_tests_config):
def analysis(file_path, chaos_tests_config, json_output):
# Load the telemetry data from file
data = load_telemetry_data(file_path)

Expand All @@ -63,28 +66,71 @@ def analysis(file_path, chaos_tests_config):
logging.info(f"CPU outliers: {outliers_cpu}")
logging.info(f"Memory outliers: {outliers_memory}")
logging.info(f"Network outliers: {outliers_network}")

logging.info("===================== HeatMap Analysis ==============================")

if cpu_services:
logging.info("Services with CPU_HEATMAP above threshold:", cpu_services)
else:
logging.info("There are no services that are using siginificant CPU compared to their assigned limits (infinite in case no limits are set).")
logging.info("There are no services that are using significant CPU compared to their assigned limits (infinite in case no limits are set).")
if mem_services:
logging.info("Services with MEM_HEATMAP above threshold:", mem_services)
else:
logging.info("There are no services that are using siginificant MEMORY compared to their assigned limits (infinite in case no limits are set).")
logging.info("There are no services that are using significant MEMORY compared to their assigned limits (infinite in case no limits are set).")
time.sleep(2)

logging.info("======================= Recommendations =============================")

if cpu_services:
logging.info(f"Recommended tests for {str(cpu_services)} :\n {chaos_tests_config['CPU']}")
logging.info("\n")

if mem_services:
logging.info(f"Recommended tests for {str(mem_services)} :\n {chaos_tests_config['MEM']}")
logging.info("\n")

if outliers_network:
logging.info(f"Recommended tests for str(outliers_network) :\n {chaos_tests_config['NETWORK']}")
logging.info(f"Recommended tests for {str(outliers_network)} :\n {chaos_tests_config['NETWORK']}")
logging.info("\n")

logging.info("\n")
logging.info("Please check data in utilisation.txt for further analysis")

if json_output is True:
analysis_data = json_struct(outliers_cpu, outliers_memory, outliers_network, cpu_services, mem_services, chaos_tests_config)
return analysis_data


def json_struct(outliers_cpu, outliers_memory, outliers_network, cpu_services,
mem_services, chaos_tests_config):

profiling = {
"cpu_outliers": outliers_cpu,
"memory_outliers": outliers_memory,
"network_outliers": outliers_network
}

heatmap = {
"services_with_cpu_heatmap_above_threshold": cpu_services,
"services_with_mem_heatmap_above_threshold": mem_services
}

recommendations = {}

if cpu_services:
cpu_recommend = {"services": cpu_services,
"tests": chaos_tests_config['CPU']}
recommendations["cpu_services_recommendations"] = cpu_recommend

if mem_services:
mem_recommend = {"services": mem_services,
"tests": chaos_tests_config['MEM']}
recommendations["mem_services_recommendations"] = mem_recommend

if outliers_network:
outliers_network_recommend = {"outliers_networks": outliers_network,
"tests": chaos_tests_config['NETWORK']}
recommendations["outliers_network_recommendations"] = (
outliers_network_recommend)

return [profiling, heatmap, recommendations]
7 changes: 3 additions & 4 deletions kraken/chaos_recommender/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def save_utilization_to_file(cpu_data, cpu_limits_result, mem_data, mem_limits_r

merged_df.to_csv(filename, sep='\t', index=False)


def fetch_utilization_from_prometheus(prometheus_endpoint, auth_token, namespace, scrape_duration):
urllib3.disable_warnings()
prometheus = PrometheusConnect(url=prometheus_endpoint, headers={'Authorization':'Bearer {}'.format(auth_token)}, disable_ssl=True)
Expand Down Expand Up @@ -89,8 +90,6 @@ def fetch_utilization_from_prometheus(prometheus_endpoint, auth_token, namespace
logging.info(network_query)
network_data = network_result


queries = [cpu_query, cpu_limits_query, mem_query, mem_limits_query]
save_utilization_to_file(cpu_data, cpu_limits_result, mem_data, mem_limits_result, network_data, saved_metrics_path)
return saved_metrics_path


return saved_metrics_path, queries
1 change: 1 addition & 0 deletions utils/chaos_recommender/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ You can also provide the input values through command-line arguments launching t
Chaos library
-L LOG_LEVEL, --log-level LOG_LEVEL
log level (DEBUG, INFO, WARNING, ERROR, CRITICAL
-J, --json-output Makes JSON output
-M MEM [MEM ...], --MEM MEM [MEM ...]
Memory related chaos tests (space separated list)
-C CPU [CPU ...], --CPU CPU [CPU ...]
Expand Down
62 changes: 54 additions & 8 deletions utils/chaos_recommender/chaos_recommender.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import argparse
import json
import logging
import os.path
import sys
import time

import yaml

# kraken module import for running the recommender
# both from the root directory and the recommender
# folder
Expand All @@ -14,7 +18,6 @@
from kubernetes import config as kube_config



def parse_arguments(parser):

# command line options
Expand All @@ -27,6 +30,9 @@ def parse_arguments(parser):
parser.add_argument("-s", "--scrape-duration", action="store", default="10m", help="Prometheus scrape duration")
parser.add_argument("-L", "--log-level", action="store", default="INFO", help="log level (DEBUG, INFO, WARNING, ERROR, CRITICAL")

parser.add_argument("-J", "--json-output", action="store_true",
help="Makes json output")

parser.add_argument("-M", "--MEM", nargs='+', action="store", default=[],
help="Memory related chaos tests (space separated list)")
parser.add_argument("-C", "--CPU", nargs='+', action="store", default=[],
Expand All @@ -36,9 +42,9 @@ def parse_arguments(parser):
parser.add_argument("-G", "--GENERIC", nargs='+', action="store", default=[],
help="Memory related chaos tests (space separated list)")


return parser.parse_args()


def read_configuration(config_file_path):
if not os.path.exists(config_file_path):
logging.error(f"Config file not found: {config_file_path}")
Expand All @@ -54,16 +60,47 @@ def read_configuration(config_file_path):
prometheus_endpoint = config.get("prometheus_endpoint", "")
auth_token = config.get("auth_token", "")
scrape_duration = config.get("scrape_duration", "10m")
chaos_tests = config.get("chaos_tests" , {})
chaos_tests = config.get("chaos_tests", {})
json_output = config.get("JSON_output", False)
return (namespace, kubeconfig, prometheus_endpoint, auth_token, scrape_duration,
chaos_tests, log_level)
chaos_tests, log_level, json_output)


def prompt_input(prompt, default_value):
user_input = input(f"{prompt} [{default_value}]: ")
if user_input.strip():
return user_input
return default_value


def make_json_output(file, namespace, kubeconfig, prometheus_endpoint, chaos_tests, scrape_duration, queries, analysis_data):
data = {
"inputs": [
{
"namespace": namespace,
"kubeconfig": kubeconfig,
"prometheus_endpoint": prometheus_endpoint,
"scrape_duration": scrape_duration,
"chaos_tests": chaos_tests
}
],
"analysis_start": [
{
"cpu_query": queries[0],
"cpu_limit_query": queries[1],
"memory_query": queries[2],
"memory_limit_query": queries[3]
}
],
"profiling": analysis_data[0],
"heatmap_analysis": analysis_data[1],
"recommendations": analysis_data[2]
}
file = f"utils/chaos_recommender/recommender_output/{file}"
with open(file, "w") as json_output:
json_output.write(json.dumps(data, indent=4))


def main():
parser = argparse.ArgumentParser(description="Krkn Chaos Recommender Command-Line tool")
args = parse_arguments(parser)
Expand All @@ -81,7 +118,8 @@ def main():
auth_token,
scrape_duration,
chaos_tests,
log_level
log_level,
json_output
) = read_configuration(args.config_file)

if args.options:
Expand All @@ -91,9 +129,10 @@ def main():
scrape_duration = args.scrape_duration
log_level = args.log_level
prometheus_endpoint = args.prometheus_endpoint
json_output = args.json_output
chaos_tests = {"MEM": args.MEM, "GENERIC": args.GENERIC, "CPU": args.CPU, "NETWORK": args.NETWORK}

if log_level not in ["DEBUG","INFO", "WARNING", "ERROR","CRITICAL"]:
if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
logging.error(f"{log_level} not a valid log level")
sys.exit(1)

Expand All @@ -110,8 +149,15 @@ def main():
logging.info("Starting Analysis ...")
logging.info("Fetching the Telemetry data")

file_path = prometheus.fetch_utilization_from_prometheus(prometheus_endpoint, auth_token, namespace, scrape_duration)
analysis(file_path, chaos_tests)
file_path, queries = prometheus.fetch_utilization_from_prometheus(prometheus_endpoint, auth_token, namespace, scrape_duration)
analysis_data = analysis(file_path, chaos_tests, json_output)

if json_output is True:
time_str = time.strftime("%d%m%y_%H%M%S", time.localtime())
recommendation_json = f"recommender_{namespace}_{time_str}.json"
make_json_output(recommendation_json, namespace, kubeconfig, prometheus_endpoint, chaos_tests, scrape_duration, queries, analysis_data)
logging.info(f"Recommendation output is in file {recommendation_json} in chaos recommender output folder.")


if __name__ == "__main__":
main()

0 comments on commit c2896d0

Please sign in to comment.