Updated doc and bulk scale test scripts

Signed-off-by: Chandrakala Subramanyam <[email protected]>
kruize · Dec 10, 2024 · 4fd462c · 4fd462c
1 parent 3053305
commit 4fd462c
Show file tree

Hide file tree

Showing 4 changed files with 418 additions and 41 deletions.
diff --git a/tests/scripts/local_monitoring_tests/bulk_scale_test/bulk_scale_test.py b/tests/scripts/local_monitoring_tests/bulk_scale_test/bulk_scale_test.py
@@ -44,14 +44,13 @@ def setup_logger(name, log_file, level=logging.INFO):
 def invoke_bulk_with_time_range_labels(worker_number, resultsdir, bulk_json, delay):
     try:
         #time.sleep(delay)
-        print("In bulk")
         scale_log_dir = resultsdir + "/scale_logs"
         os.makedirs(scale_log_dir, exist_ok=True)
 
         org_id = bulk_json['filter']['include']['labels']['org_id']
         cluster_id = bulk_json['filter']['include']['labels']['cluster_id']
 
-        log_id = "worker_" + str(worker_number) + "-" + org_id + "-" + cluster_id
+        log_id = str(worker_number) + "-" + org_id + "-" + cluster_id
 
         print(log_id)
 
@@ -156,22 +155,17 @@ def invoke_bulk_with_time_range_labels(worker_number, resultsdir, bulk_json, del
     except Exception as e:
         return {'error': str(e)}
 
-def parallel_requests_with_labels(max_workers, resultsdir, initial_end_time, interval_hours, days_of_res, test, interval_seconds):
+def parallel_requests_with_labels(max_workers, resultsdir, initial_end_time, interval_hours, days_of_res, org_ids, cluster_ids, interval_seconds):
     results = []
-    # To do: For every 6 hrs time range (starting from end_time until 15 days), trigger parallel requests with same timerange
+
     print(f"initial_end_time - {initial_end_time}")
     print(f"days_of_res - {days_of_res}")
     print(f"interval_hours - {interval_hours}")
+
     num_tsdb_blocks = int((days_of_res * 24) / interval_hours)
 
     print(f"num_tsdb_blocks - {num_tsdb_blocks}")
 
-    # 100k exps
-    # org * clusters * namespaces * workloads
-    # 100k - 10 * 10 * 20 * 50
-    org_ids=10
-    cluster_ids=10
-
     current_end_time = initial_end_time
 
     for k in range(1, num_tsdb_blocks):
@@ -229,38 +223,43 @@ def parallel_requests_with_labels(max_workers, resultsdir, initial_end_time, int
     max_workers = 1
     days_of_res = 1
     results_dir = "."
-    initial_end_date = "2024-12-06T18:00:00.001Z"
+    initial_end_date = "2024-12-10T11:50:00.001Z"
     interval_hours = 6
-    test = ""
+    org_ids = 10
+    cluster_ids = 10
     rampup_interval_seconds = 2
 
     parser = argparse.ArgumentParser()
 
     # add the named arguments
-    parser.add_argument('--test', type=str, help='specify the test to be run')
     parser.add_argument('--workers', type=str, help='specify the number of workers')
-    parser.add_argument('--startdate', type=str, help='Specify start date and time in "%Y-%m-%dT%H:%M:%S.%fZ" format.')
+    parser.add_argument('--enddate', type=str, help='Specify end date and time of the tsdb block in "%Y-%m-%dT%H:%M:%S.%fZ" format.')
     parser.add_argument('--interval', type=str, help='specify the interval hours')
     parser.add_argument('--resultsdir', type=str, help='specify the results dir')
+    parser.add_argument('--org_ids', type=str, help='specify the no. of orgs')
+    parser.add_argument('--cluster_ids', type=str, help='specify the no. of clusters / org')
 
     # parse the arguments from the command line
     args = parser.parse_args()
 
-    if args.test:
-        test = args.test
-
     if args.workers:
         max_workers = int(args.workers)
 
-    if args.startdate:
-        initial_end_date = args.startdate
+    if args.enddate:
+        initial_end_date = args.enddate
 
     if args.interval:
         interval_hours = int(args.interval)
 
     if args.resultsdir:
         results_dir = args.resultsdir
 
+    if args.org_ids:
+        org_ids = int(args.org_ids)
+
+    if args.cluster_ids:
+        org_ids = int(args.cluster_ids)
+
     form_kruize_url(cluster_type)
 
     # Create the metric profile
@@ -273,18 +272,9 @@ def parallel_requests_with_labels(max_workers, resultsdir, initial_end_time, int
     datasource_name = None
     list_response = list_datasources(datasource_name)
 
-    # Import datasource metadata
-    # input_json_file = "../json_files/thanos_import_metadata.json"
-    # meta_response = import_metadata(input_json_file)
-    # metadata_json = meta_response.json()
-    # print(metadata_json)
-    # if meta_response.status_code != 201:
-    #     print("Importing metadata from the datasource failed!")
-    #     sys.exit(1)
-
     start_time = time.time()
     print(f"initial_end_date to parallel requests - {initial_end_date}")
-    responses = parallel_requests_with_labels(max_workers, results_dir, initial_end_date, interval_hours, days_of_res, test, rampup_interval_seconds)
+    responses = parallel_requests_with_labels(max_workers, results_dir, initial_end_date, interval_hours, days_of_res, org_ids, cluster_ids, rampup_interval_seconds)
 
     # Print the results
     for i, response in enumerate(responses):

diff --git a/tests/scripts/local_monitoring_tests/bulk_scale_test/bulk_scale_test.sh b/tests/scripts/local_monitoring_tests/bulk_scale_test/bulk_scale_test.sh
@@ -31,23 +31,25 @@ CLUSTER_TYPE=openshift
 NAMESPACE=openshift-tuning
 num_workers=5
 interval_hours=6
-initial_start_date="2024-12-07T00:00:00.000Z"
+initial_end_date="2024-12-10T11:50:00.000Z"
+org_ids=10
+cluster_ids=10
 
 skip_setup=0
 prometheus_ds=0
 replicas=3
-test="time_range"
+
 ds_url="http://thanos-query-frontend.thanos-bench.svc.cluster.local:9090/"
 
 target="crc"
 KRUIZE_IMAGE="quay.io/kruize/autotune:mvp_demo"
 
 function usage() {
 	echo
-	echo "Usage: [-i Kruize image] [-w No. of workers (default - 5)] [-t interval hours (default - 2)] [-s Initial start date (default - 2024-11-11T00:00:00.000Z)]"
+	echo "Usage: [-i Kruize image] [-w No. of workers (default - 5)] [-t interval hours (default - 2)] [-s Initial end date of tsdb block (default - 2024-11-11T00:00:00.000Z)]"
 	echo "[-a kruize replicas (default - 3)][-r <resultsdir path>] [--skipsetup skip kruize setup] [ -z to test with prometheus datasource]"
-	echo "[--test Specify the test to be run (default - time_range)] [--url Datasource url (default - ${ds_url}]"
-	exit -1
+	echo "[--url Datasource url (default - ${ds_url}] [-o No. of orgs (default - 10)] [-c No. of clusters / org (default - 10)]"
+	exit 1
 }
 
 function get_kruize_pod_log() {
@@ -95,15 +97,12 @@ function kruize_local_thanos_patch() {
 }
 
 
-while getopts r:i:w:s:t:a:zh:-: gopts
+while getopts r:i:w:s:t:a:o:c:zh:-: gopts
 do
 	case ${gopts} in
 	-)
 		case "${OPTARG}" in
-			test=*)
-				test=${OPTARG#*=}
-				;;
-			url=*)
+		  url=*)
 				ds_url=${OPTARG#*=}
 				;;
 			skipsetup)
@@ -125,14 +124,20 @@ do
 		num_workers="${OPTARG}"		
 		;;
 	s)
-		initial_start_date="${OPTARG}"		
+		initial_end_date="${OPTARG}"
 		;;
 	t)
 		interval_hours="${OPTARG}"		
 		;;
 	a)
 		replicas="${OPTARG}"
 		;;
+  o)
+		org_ids="${OPTARG}"
+		;;
+  c)
+    cluster_ids="${OPTARG}"
+		;;
 	z)
 		prometheus_ds=1
 		;;
@@ -207,7 +212,7 @@ export PYTHONUNBUFFERED=1
 echo ""
 echo "Running scale test for kruize on ${CLUSTER_TYPE}" | tee -a ${LOG}
 echo ""
-python3 bulk_scale_test.py --test ${test} --workers ${num_workers} --startdate ${initial_start_date} --interval ${interval_hours} --resultsdir ${LOG_DIR} | tee -a ${LOG}
+python3 bulk_scale_test.py --org_ids ${org_ids} --cluster_ids ${cluster_ids} --workers ${num_workers} --enddate ${initial_end_date} --interval ${interval_hours} --resultsdir ${LOG_DIR} | tee -a ${LOG}
 
 end_time=$(get_date)
 elapsed_time=$(time_diff "${start_time}" "${end_time}")