initial commit

Signed-off-by: HermioneKT <[email protected]>
vhive-serverless · Apr 16, 2024 · 17dd759 · 17dd759
1 parent 37e5bf0
commit 17dd759
Show file tree

Hide file tree

Showing 12 changed files with 539 additions and 1 deletion.
diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -0,0 +1,18 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior.
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md
@@ -0,0 +1,11 @@
+---
+name: Enhancement
+about: Create a report to help us improve
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Describe the enhancement**
+A clear and concise description of what the enhancement is.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,17 @@
+# Use https://dependabot.com/docs/config-file/validator/ to check for errors.
+version: 2
+updates:
+  - package-ecosystem: "gomod"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    ignore:
+      - dependency-name: "*"
+        update-types: [ "version-update:semver-patch" ]
+
+  # Enable version updates for Actions
+  - package-ecosystem: "github-actions"
+    # Look for `.github/workflows` in the `root` directory
+    directory: "/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,17 @@
+## Summary
+
+A small summary of the requirements (in one/two sentences).
+
+## Implementation Notes :hammer_and_pick:
+
+* Briefly outline the overall technical solution. If necessary, identify talking points where the reviewer's attention should be drawn to.
+
+## External Dependencies :four_leaf_clover:
+
+* 
+
+## Breaking API Changes :warning:
+
+* 
+
+*Simply specify none (N/A) if not applicable.*
diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml
@@ -0,0 +1,46 @@
+name: Linters
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+    name: Spellcheck
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v4
+    - uses: rojopolis/[email protected]
+      name: Spellcheck
+      with:
+        config_path: configs/.spellcheck.yml
+  commitlint:
+    name: Commitlint
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Install Commitlint
+        run: sudo npm install -g @commitlint/cli
+
+      - name: Lint commits
+        run: commitlint 
+          --config ${{ github.workspace }}/configs/commitlint.config.js 
+          --help-url 'https://stackoverflow.com/a/45974435' 
+          --from HEAD~1 --to HEAD 
+          --verbose
+  markdown-link-check:
+    name: LinkCheck
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: gaurav-nelson/github-action-markdown-link-check@v1
+        with:
+          use-quiet-mode: 'yes'
+          config-file: 'configs/.linkcheck.json'
diff --git a/README.md b/README.md
@@ -1 +1,83 @@
-# power_manager
+# K8s Power Manager Experiments
+
+## Setup 
+1. Set up a knative cluster (refer to vHive)
+2. 4 vSwarm benchmarks are used to run the experiments. On master node, deploy these benchmarks on the Knative cluster.
+    ```bash
+    git clone --depth=1 https://github.com/vhive-serverless/vSwarm.git
+
+    cd $HOME/vSwarm/tools/test-client && go build ./test-client.go
+
+    kn service apply -f $HOME/vSwarm/benchmarks/auth/yamls/knative/kn-auth-python.yaml
+    kn service apply -f $HOME/vSwarm/benchmarks/aes/yamls/knative/kn-aes-python.yaml
+    kn service apply -f $HOME/vSwarm/benchmarks/sleeping/yamls/knative/kn-sleeping-go.yaml
+    kn service apply -f $HOME/vSwarm/benchmarks/spinning/yamls/knative/kn-spinning-go.yaml
+    ```
+3. Change the global variable node names in power_manager/util.go based on the actual names of your node.
+
+### Experiment 1: Workload sensitivity 
+This experiment is to confirm that workload sensitivity to CPU frequency varies for different types of workloads, with CPU-bound workloads showing greater sensitivity than I/O-bound workloads as I/O-bound workloads are primarily limited by factors such as disk and network speed rather than CPU processing speed. 2 node knative cluster is needed for this experiment.
+
+1. On master node, run the node setup script:
+    ```bash
+    ./scripts/power_manager/setup_power_manager.sh;
+    ```
+   Then run the experiment:
+    ```bash
+    go run $HOME/vhive/examples/power_manager/workload_sensitivity/main.go;
+    ```
+
+### Experiment 2: Internode scaling
+3 node cluster is needed. 3 scenarios are performed:
+- Scenario 1: All worker nodes have low frequency 
+- Scenario 2: All worker nodes have high frequency
+- Scenario 3: 1 worker node has high frequency, another with low frequency (need to manually tune like experiment 3 point 4&5 below)
+
+This experiment is to confirm that using all low-frequency combinations results in low power consumption but comes with the drawback of high latency. Conversely, opting for all high-frequency combinations maximizes performance by significantly reducing latency but it does so at the cost of high-power consumption. A 50/50 mix of frequencies strikes a
+balance, offering medium power consumption with the benefit of low latency.
+
+1. On master node, run the node setup script:
+    ```bash
+    ./scripts/power_manager/setup_power_manager.sh;
+    ```
+   Then run the experiment:
+    ```bash
+    go run $HOME/vhive/examples/power_manager/internode_scaling/main.go;
+    ```
+
+### Experiment 3: Class Assignment 
+3 node cluster is needed with 1 master node, 1 high frequency worker node and 1 low frequency worker node (manually set up as experiment 2 scenario 3).
+
+This experiment is to confirm that the automatic assignment of workloads based on their workload sensitivity will lead to improved performance and optimized power consumption. Sensitive workloads with more than a 40% latency difference between 5th and 90th percentiles (such as Spinning) will be automatically assigned to high frequency node should experience lower latency, while less sensitive workloads (such as Sleeping) can be efficiently handled by low-frequency nodes, conserving energy without significantly impacting performance.
+
+1. Thus on master node, we need to enable nodeSelector:
+```bash
+   kubectl patch configmap config-features -n knative-serving -p '{"data": {"kubernetes.podspec-nodeselector": "enabled"}}'
+```
+
+2. On master node, label the worker node 
+    ```bash
+    kubectl label node node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us loader-nodetype=worker-low
+    kubectl label node node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us loader-nodetype=worker-high 
+    ```
+    Run the node setup script:
+    ```bash
+    ./scripts/power_manager/setup_power_manager.sh;
+    ```
+4. On worker node 1, manually set all CPU frequency to 1.2GHz. i.e. run the below command for all CPU core:
+    ```bash
+    echo performance | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+    echo 1200000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
+    echo 1200000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+    ```
+5. On worker node 2, manually set all CPU frequency to 2.4GHz.
+    ```bash
+    echo performance | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+    echo 2400000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq
+    echo 2400000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+    ```
+
+6. Run the experiment:
+    ```bash
+    go run $HOME/vhive/examples/power_manager/assignment/main.go;
+    ```
diff --git a/assignment/main.go b/assignment/main.go
@@ -0,0 +1,122 @@
+package powermanager
+
+import (
+	"encoding/csv"
+	"fmt"
+	"os"
+	"os/exec"
+	"sync"
+	"time"
+
+	powermanager_util "github.com/vhive-serverless/power_manager"
+)
+
+var (
+	serviceAssignment = map[string]bool{
+		"spinning-go": false,
+		"sleeping-go": false,
+		"aes-python":  false,
+		"auth-python": false,
+	}
+)
+
+func processLatencies(records []int64, serviceName string) {
+	if len(records) == 0 {
+		fmt.Println("No data to process")
+		return
+	}
+
+	fifthPercentile := powermanager_util.GetDataAtPercentile(records, 5)
+	ninetiethPercentile := powermanager_util.GetDataAtPercentile(records, 90)
+	difference := float64(ninetiethPercentile-fifthPercentile) / float64(fifthPercentile)
+	if difference >= 0.40 && !serviceAssignment[serviceName] { // Assign to high performance class
+		fmt.Println("Assigning to high performance class")
+		command := fmt.Sprintf("kubectl patch service.serving.knative.dev %s --type merge --patch '{\"spec\":{\"template\":{\"spec\":{\"nodeSelector\":{\"loader-nodetype\":\"worker-high\"}}}}}' --namespace default", serviceName)
+		cmd := exec.Command("bash", "-c", command)
+		_, err := cmd.CombinedOutput()
+		if err != nil {
+			fmt.Printf(fmt.Sprintf("Error assigning to high performance class: %+v", err))
+			return
+		}
+		serviceAssignment[serviceName] = true
+	}
+	if difference < 0.10 && !serviceAssignment[serviceName] { // Assign to low performance class
+		fmt.Println("Assigning to low performance class")
+		command := fmt.Sprintf("kubectl patch service.serving.knative.dev %s --type merge --patch '{\"spec\":{\"template\":{\"spec\":{\"nodeSelector\":{\"loader-nodetype\":\"worker-low\"}}}}}' --namespace default", serviceName)
+		cmd := exec.Command("bash", "-c", command)
+		_, err := cmd.CombinedOutput()
+		if err != nil {
+			fmt.Printf(fmt.Sprintf("Error assigning to low performance class: %+v", err))
+			return
+		}
+		serviceAssignment[serviceName] = true
+	}
+}
+
+func assignWorkload(ch_latency <-chan int64, serviceName string, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	ticker := time.NewTicker(1 * time.Minute)
+	defer ticker.Stop()
+
+	var records []int64
+
+	for {
+		select {
+		case record, ok := <-ch_latency:
+			if !ok {
+				// Channel is closed, process remaining data
+				processLatencies(records, serviceName)
+				return
+			}
+			records = append(records, record)
+		case <-ticker.C:
+			// Time to process the data
+			processLatencies(records, serviceName)
+		}
+	}
+}
+
+func main() {
+	file, err := os.Create("metrics2.csv")
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+
+	writer := csv.NewWriter(file)
+	defer writer.Flush()
+
+	err = writer.Write(append([]string{"startTime", "endTime", "spinningLatency", "sleepingLatency"}))
+	if err != nil {
+		fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
+	}
+
+	ch := make(chan []string)
+	ch_latency_spinning := make(chan int64)
+	ch_latency_sleeping := make(chan int64)
+
+	var wg sync.WaitGroup
+	wg.Add(3)
+	go powermanager_util.WriteToCSV(writer, ch, &wg)
+	go assignWorkload(ch_latency_spinning, "spinning-go", &wg)
+	go assignWorkload(ch_latency_sleeping, "sleeping-go", &wg)
+
+	now := time.Now()
+	for time.Since(now) < (time.Minute * 5) {
+		go powermanager_util.InvokeConcurrently(5, powermanager_util.SleepingURL, ch, ch_latency_spinning, ch_latency_sleeping, false)
+		go powermanager_util.InvokeConcurrently(5, powermanager_util.SpinningURL, ch, ch_latency_spinning, ch_latency_sleeping, true)
+
+		time.Sleep(1 * time.Second) // Wait for 1 second before invoking again
+	}
+	close(ch)
+	close(ch_latency_spinning)
+	close(ch_latency_sleeping)
+	wg.Wait()
+
+	err = writer.Write(append([]string{"-", "-", "-", "-"}))
+	if err != nil {
+		fmt.Printf("Error writing metrics to the CSV file: %v\n", err)
+	}
+	fmt.Println("done")
+}
diff --git a/go.mod b/go.mod
@@ -0,0 +1,5 @@
+module github.com/vhive-serverless/power_manager
+
+
+go 1.21
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,5 @@
		module github.com/vhive-serverless/power_manager


		go 1.21