From 17dd7595adc4ca9729105c53a880af5eeb3c8fcc Mon Sep 17 00:00:00 2001 From: HermioneKT Date: Tue, 16 Apr 2024 12:52:22 +0800 Subject: [PATCH] initial commit Signed-off-by: HermioneKT --- .github/ISSUE_TEMPLATE/bug-report.md | 18 ++++ .github/ISSUE_TEMPLATE/enhancement.md | 11 ++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++ .github/dependabot.yml | 17 +++ .github/pull_request_template.md | 17 +++ .github/workflows/linters.yml | 46 ++++++++ README.md | 84 ++++++++++++++- assignment/main.go | 122 ++++++++++++++++++++++ go.mod | 5 + internode_scaling/main.go | 70 +++++++++++++ utils.go | 78 ++++++++++++++ workload_sensitivity/main.go | 52 +++++++++ 12 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/enhancement.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/dependabot.yml create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/linters.yml create mode 100644 assignment/main.go create mode 100644 go.mod create mode 100644 internode_scaling/main.go create mode 100644 utils.go create mode 100644 workload_sensitivity/main.go diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..0a97511 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,18 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior. + +**Expected behavior** +A clear and concise description of what you expected to happen. + diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md new file mode 100644 index 0000000..2e06ff4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement.md @@ -0,0 +1,11 @@ +--- +name: Enhancement +about: Create a report to help us improve +title: '' +labels: enhancement +assignees: '' + +--- + +**Describe the enhancement** +A clear and concise description of what the enhancement is. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..19e9d15 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +# Use https://dependabot.com/docs/config-file/validator/ to check for errors. +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + ignore: + - dependency-name: "*" + update-types: [ "version-update:semver-patch" ] + + # Enable version updates for Actions + - package-ecosystem: "github-actions" + # Look for `.github/workflows` in the `root` directory + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..faca125 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,17 @@ +## Summary + +A small summary of the requirements (in one/two sentences). + +## Implementation Notes :hammer_and_pick: + +* Briefly outline the overall technical solution. If necessary, identify talking points where the reviewer's attention should be drawn to. + +## External Dependencies :four_leaf_clover: + +* + +## Breaking API Changes :warning: + +* + +*Simply specify none (N/A) if not applicable.* diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml new file mode 100644 index 0000000..c246e2d --- /dev/null +++ b/.github/workflows/linters.yml @@ -0,0 +1,46 @@ +name: Linters +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + name: Spellcheck + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: rojopolis/spellcheck-github-actions@0.36.0 + name: Spellcheck + with: + config_path: configs/.spellcheck.yml + commitlint: + name: Commitlint + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Commitlint + run: sudo npm install -g @commitlint/cli + + - name: Lint commits + run: commitlint + --config ${{ github.workspace }}/configs/commitlint.config.js + --help-url 'https://stackoverflow.com/a/45974435' + --from HEAD~1 --to HEAD + --verbose + markdown-link-check: + name: LinkCheck + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + use-quiet-mode: 'yes' + config-file: 'configs/.linkcheck.json' diff --git a/README.md b/README.md index 680960c..4154216 100644 --- a/README.md +++ b/README.md @@ -1 +1,83 @@ -# power_manager \ No newline at end of file +# K8s Power Manager Experiments + +## Setup +1. Set up a knative cluster (refer to vHive) +2. 4 vSwarm benchmarks are used to run the experiments. On master node, deploy these benchmarks on the Knative cluster. + ```bash + git clone --depth=1 https://github.com/vhive-serverless/vSwarm.git + + cd $HOME/vSwarm/tools/test-client && go build ./test-client.go + + kn service apply -f $HOME/vSwarm/benchmarks/auth/yamls/knative/kn-auth-python.yaml + kn service apply -f $HOME/vSwarm/benchmarks/aes/yamls/knative/kn-aes-python.yaml + kn service apply -f $HOME/vSwarm/benchmarks/sleeping/yamls/knative/kn-sleeping-go.yaml + kn service apply -f $HOME/vSwarm/benchmarks/spinning/yamls/knative/kn-spinning-go.yaml + ``` +3. Change the global variable node names in power_manager/util.go based on the actual names of your node. + +### Experiment 1: Workload sensitivity +This experiment is to confirm that workload sensitivity to CPU frequency varies for different types of workloads, with CPU-bound workloads showing greater sensitivity than I/O-bound workloads as I/O-bound workloads are primarily limited by factors such as disk and network speed rather than CPU processing speed. 2 node knative cluster is needed for this experiment. + +1. On master node, run the node setup script: + ```bash + ./scripts/power_manager/setup_power_manager.sh; + ``` + Then run the experiment: + ```bash + go run $HOME/vhive/examples/power_manager/workload_sensitivity/main.go; + ``` + +### Experiment 2: Internode scaling +3 node cluster is needed. 3 scenarios are performed: +- Scenario 1: All worker nodes have low frequency +- Scenario 2: All worker nodes have high frequency +- Scenario 3: 1 worker node has high frequency, another with low frequency (need to manually tune like experiment 3 point 4&5 below) + +This experiment is to confirm that using all low-frequency combinations results in low power consumption but comes with the drawback of high latency. Conversely, opting for all high-frequency combinations maximizes performance by significantly reducing latency but it does so at the cost of high-power consumption. A 50/50 mix of frequencies strikes a +balance, offering medium power consumption with the benefit of low latency. + +1. On master node, run the node setup script: + ```bash + ./scripts/power_manager/setup_power_manager.sh; + ``` + Then run the experiment: + ```bash + go run $HOME/vhive/examples/power_manager/internode_scaling/main.go; + ``` + +### Experiment 3: Class Assignment +3 node cluster is needed with 1 master node, 1 high frequency worker node and 1 low frequency worker node (manually set up as experiment 2 scenario 3). + +This experiment is to confirm that the automatic assignment of workloads based on their workload sensitivity will lead to improved performance and optimized power consumption. Sensitive workloads with more than a 40% latency difference between 5th and 90th percentiles (such as Spinning) will be automatically assigned to high frequency node should experience lower latency, while less sensitive workloads (such as Sleeping) can be efficiently handled by low-frequency nodes, conserving energy without significantly impacting performance. + +1. Thus on master node, we need to enable nodeSelector: +```bash + kubectl patch configmap config-features -n knative-serving -p '{"data": {"kubernetes.podspec-nodeselector": "enabled"}}' +``` + +2. On master node, label the worker node + ```bash + kubectl label node node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us loader-nodetype=worker-low + kubectl label node node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us loader-nodetype=worker-high + ``` + Run the node setup script: + ```bash + ./scripts/power_manager/setup_power_manager.sh; + ``` +4. On worker node 1, manually set all CPU frequency to 1.2GHz. i.e. run the below command for all CPU core: + ```bash + echo performance | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor + echo 1200000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq + echo 1200000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq + ``` +5. On worker node 2, manually set all CPU frequency to 2.4GHz. + ```bash + echo performance | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor + echo 2400000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq + echo 2400000 | sudo tee /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq + ``` + +6. Run the experiment: + ```bash + go run $HOME/vhive/examples/power_manager/assignment/main.go; + ``` diff --git a/assignment/main.go b/assignment/main.go new file mode 100644 index 0000000..83c1c88 --- /dev/null +++ b/assignment/main.go @@ -0,0 +1,122 @@ +package powermanager + +import ( + "encoding/csv" + "fmt" + "os" + "os/exec" + "sync" + "time" + + powermanager_util "github.com/vhive-serverless/power_manager" +) + +var ( + serviceAssignment = map[string]bool{ + "spinning-go": false, + "sleeping-go": false, + "aes-python": false, + "auth-python": false, + } +) + +func processLatencies(records []int64, serviceName string) { + if len(records) == 0 { + fmt.Println("No data to process") + return + } + + fifthPercentile := powermanager_util.GetDataAtPercentile(records, 5) + ninetiethPercentile := powermanager_util.GetDataAtPercentile(records, 90) + difference := float64(ninetiethPercentile-fifthPercentile) / float64(fifthPercentile) + if difference >= 0.40 && !serviceAssignment[serviceName] { // Assign to high performance class + fmt.Println("Assigning to high performance class") + command := fmt.Sprintf("kubectl patch service.serving.knative.dev %s --type merge --patch '{\"spec\":{\"template\":{\"spec\":{\"nodeSelector\":{\"loader-nodetype\":\"worker-high\"}}}}}' --namespace default", serviceName) + cmd := exec.Command("bash", "-c", command) + _, err := cmd.CombinedOutput() + if err != nil { + fmt.Printf(fmt.Sprintf("Error assigning to high performance class: %+v", err)) + return + } + serviceAssignment[serviceName] = true + } + if difference < 0.10 && !serviceAssignment[serviceName] { // Assign to low performance class + fmt.Println("Assigning to low performance class") + command := fmt.Sprintf("kubectl patch service.serving.knative.dev %s --type merge --patch '{\"spec\":{\"template\":{\"spec\":{\"nodeSelector\":{\"loader-nodetype\":\"worker-low\"}}}}}' --namespace default", serviceName) + cmd := exec.Command("bash", "-c", command) + _, err := cmd.CombinedOutput() + if err != nil { + fmt.Printf(fmt.Sprintf("Error assigning to low performance class: %+v", err)) + return + } + serviceAssignment[serviceName] = true + } +} + +func assignWorkload(ch_latency <-chan int64, serviceName string, wg *sync.WaitGroup) { + defer wg.Done() + + ticker := time.NewTicker(1 * time.Minute) + defer ticker.Stop() + + var records []int64 + + for { + select { + case record, ok := <-ch_latency: + if !ok { + // Channel is closed, process remaining data + processLatencies(records, serviceName) + return + } + records = append(records, record) + case <-ticker.C: + // Time to process the data + processLatencies(records, serviceName) + } + } +} + +func main() { + file, err := os.Create("metrics2.csv") + if err != nil { + panic(err) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + err = writer.Write(append([]string{"startTime", "endTime", "spinningLatency", "sleepingLatency"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + + ch := make(chan []string) + ch_latency_spinning := make(chan int64) + ch_latency_sleeping := make(chan int64) + + var wg sync.WaitGroup + wg.Add(3) + go powermanager_util.WriteToCSV(writer, ch, &wg) + go assignWorkload(ch_latency_spinning, "spinning-go", &wg) + go assignWorkload(ch_latency_sleeping, "sleeping-go", &wg) + + now := time.Now() + for time.Since(now) < (time.Minute * 5) { + go powermanager_util.InvokeConcurrently(5, powermanager_util.SleepingURL, ch, ch_latency_spinning, ch_latency_sleeping, false) + go powermanager_util.InvokeConcurrently(5, powermanager_util.SpinningURL, ch, ch_latency_spinning, ch_latency_sleeping, true) + + time.Sleep(1 * time.Second) // Wait for 1 second before invoking again + } + close(ch) + close(ch_latency_spinning) + close(ch_latency_sleeping) + wg.Wait() + + err = writer.Write(append([]string{"-", "-", "-", "-"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + fmt.Println("done") +} \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..7f7214f --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/vhive-serverless/power_manager + + +go 1.21 + diff --git a/internode_scaling/main.go b/internode_scaling/main.go new file mode 100644 index 0000000..ab90372 --- /dev/null +++ b/internode_scaling/main.go @@ -0,0 +1,70 @@ +package main + +import ( + "encoding/csv" + "fmt" + "os" + "sync" + "time" + + powermanager_util "github.com/vhive-serverless/power_manager" + powermanager "github.com/vhive-serverless/vhive/power_manager" +) + +func main() { + file, err := os.Create("metrics1.csv") + if err != nil { + panic(err) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + err = writer.Write(append([]string{"startTime", "endTime", "spinningLatency", "sleepingLatency"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + + ch := make(chan []string) + ch_latency_spinning := make(chan int64) + ch_latency_sleeping := make(chan int64) + + var wg sync.WaitGroup + wg.Add(3) + go powermanager_util.WriteToCSV(writer, ch, &wg) + + frequencies := map[string]int64{ + powermanager_util.LowFrequencyPowerProfile: 1200, + powermanager_util.HighFrequencyPowerProfile: 2400, + } // for 50/50, need to manually tune the frequency of the individual node + + for powerProfile, freq := range frequencies { + err := powermanager.SetPowerProfileToNode(powerProfile, powermanager_util.Node1Name, freq) + if err != nil { + fmt.Printf(fmt.Sprintf("Error setting up power profile for node1: %+v", err)) + } + err = powermanager.SetPowerProfileToNode(powerProfile, powermanager_util.Node2Name, freq) + if err != nil { + fmt.Printf(fmt.Sprintf("Error setting up power profile for node2: %+v", err)) + } + + now := time.Now() + for time.Since(now) < (time.Minute * 5) { + go powermanager_util.InvokeConcurrently(5, powermanager_util.SleepingURL, ch, ch_latency_spinning, ch_latency_sleeping, false) + go powermanager_util.InvokeConcurrently(5, powermanager_util.SpinningURL, ch, ch_latency_spinning, ch_latency_sleeping, true) + + time.Sleep(1 * time.Second) // Wait for 1 second before invoking again + } + close(ch) + close(ch_latency_spinning) + close(ch_latency_sleeping) + wg.Wait() + + err = writer.Write(append([]string{"-", "-", "-", "-"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + fmt.Println("done") + } +} diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..e198cf6 --- /dev/null +++ b/utils.go @@ -0,0 +1,78 @@ +package powermanager + +import ( + "encoding/csv" + "fmt" + "os/exec" + "sort" + "strconv" + "sync" + "time" +) + +var ( + SpinningURL = "spinning-go.default.192.168.1.240.sslip.io" + SleepingURL = "sleeping-go.default.192.168.1.240.sslip.io" + AesURL = "aes-python.default.192.168.1.240.sslip.io" + AuthURL = "auth-python.default.192.168.1.240.sslip.io" + Node1Name = "node-1.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us" // to be replaced by your node name + Node2Name = "node-2.kt-cluster.ntu-cloud-pg0.utah.cloudlab.us" // to be replaced by your node name + HighFrequencyPowerProfile = "performance" + LowFrequencyPowerProfile = "shared" +) + +func Invoke(url string) (int64, int64, int64, error) { + command := fmt.Sprintf("cd $HOME/vSwarm/tools/test-client && ./test-client --addr %s:80 --name \"allow\"", url) + startInvoke := time.Now().UTC().UnixMilli() + cmd := exec.Command("bash", "-c", command) + _, err := cmd.CombinedOutput() + if err != nil { + return 0, 0, 0, err + } + endInvoke := time.Now().UTC().UnixMilli() + latency := endInvoke - startInvoke + return startInvoke, endInvoke, latency , nil +} + +func InvokeConcurrently(n int, url string, ch chan<- []string, ch_latency_spinning chan<- int64, ch_latency_sleeping chan<- int64, spinning bool) { + for i := 0; i < n; i++ { + go func() { + startInvoke, endInvoke, latency, err := Invoke(url) + if err != nil { + fmt.Printf("Error invoking benchmark: %v\n", err) + } + if spinning { + ch_latency_spinning <- latency + ch <- []string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), strconv.FormatInt(latency, 10), "-"} + } else { + ch_latency_sleeping <- latency + ch <- []string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), "-", strconv.FormatInt(latency, 10)} + } + }() + } +} + +func WriteToCSV(writer *csv.Writer, ch <-chan []string, wg *sync.WaitGroup) { + defer wg.Done() + for record := range ch { + if err := writer.Write(record); err != nil { + fmt.Printf("Error writing to CSV file: %v\n", err) + } + } +} + +func GetDataAtPercentile(data []int64, percentile float64) int64 { + if len(data) == 0 { + return 0 + } + sort.Slice(data, func(i, j int) bool { return data[i] < data[j] }) + n := (percentile / 100) * float64(len(data)-1) + index := int(n) + + if index < 0 { + index = 0 + } else if index >= len(data) { + index = len(data) - 1 + } + return data[index] +} \ No newline at end of file diff --git a/workload_sensitivity/main.go b/workload_sensitivity/main.go new file mode 100644 index 0000000..6c90e7e --- /dev/null +++ b/workload_sensitivity/main.go @@ -0,0 +1,52 @@ +package main + +import ( + "encoding/csv" + "fmt" + "os" + "strconv" + + util "github.com/vhive-serverless/power_manager" + powermanager "github.com/vhive-serverless/vhive/power_manager" +) + +func main() { + file, err := os.Create("metrics.csv") + if err != nil { + panic(err) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + err = writer.Write(append([]string{"startTime", "endTime", "latency"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + + frequencies := []int64{1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600} + for i := 0; i < len(frequencies); i++ { + err := powermanager.SetPowerProfileToNode(util.Node1Name, util.HighFrequencyPowerProfile, frequencies[i]) + if err != nil { + fmt.Printf(fmt.Sprintf("Error setting up power profile: %+v", err)) + } + + for j := 0; j < 1000; j++ { + startInvoke, endInvoke, latency, err := util.Invoke(util.SpinningURL) + if err != nil { + fmt.Printf("Error invoking benchmark: %v\n", err) + } + err = writer.Write(append([]string{strconv.FormatInt(startInvoke, 10), strconv.FormatInt(endInvoke, 10), strconv.FormatInt(latency, 10)})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + } + + err = writer.Write(append([]string{"-", "-", "-"})) + if err != nil { + fmt.Printf("Error writing metrics to the CSV file: %v\n", err) + } + fmt.Println("done") + } +} \ No newline at end of file