From 0761d4b63d1b013d28564e52d92d7ed698eccc88 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Tue, 23 Apr 2024 09:12:50 +0000 Subject: [PATCH] *: init node100_dp5_pod10k for runkperf bench Signed-off-by: Wei Fu --- .../commands/bench/node100_dp5_pod10k.go | 108 ++++++++++++++++++ contrib/cmd/runkperf/commands/bench/root.go | 1 + .../loadprofile/node100_dp5_pod10k.yaml | 40 +++++++ 3 files changed, 149 insertions(+) create mode 100644 contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go create mode 100644 contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml diff --git a/contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go b/contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go new file mode 100644 index 0000000..60b8e50 --- /dev/null +++ b/contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go @@ -0,0 +1,108 @@ +package bench + +import ( + "context" + "fmt" + "sync" + "time" + + internaltypes "github.com/Azure/kperf/contrib/internal/types" + "github.com/Azure/kperf/contrib/internal/utils" + + "github.com/urfave/cli" +) + +var benchNode100Deployment5Pod10KCase = cli.Command{ + Name: "node100_dp5_pod10k", + Usage: ` + +The test suite is to setup 100 virtual nodes and deploy 5 deployments for 10k +pods on that nodes. It repeats to rolling-update deployments one by one during +benchmark. + `, + Flags: []cli.Flag{ + cli.IntFlag{ + Name: "total", + Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 10)", + Value: 36000, + }, + cli.IntFlag{ + Name: "podsize", + Usage: "Add in pod's annotation to increase pod size. The value is close to pod's size", + Value: 0, + }, + }, + Action: func(cliCtx *cli.Context) error { + _, err := renderBenchmarkReportInterceptor( + addAPIServerCoresInfoInterceptor(benchNode100Deployment5Pod10KRun), + )(cliCtx) + return err + }, +} + +// benchNode100Deployment5Pod10KCase is for subcommand benchNode100Deployment5Pod10KCase. +func benchNode100Deployment5Pod10KRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { + ctx := context.Background() + kubeCfgPath := cliCtx.GlobalString("kubeconfig") + + rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, + "loadprofile/node100_dp5_pod10k.yaml") + if err != nil { + return nil, err + } + defer func() { _ = rgCfgFileDone() }() + + vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node100dp5pod10k", 100, 150) + if err != nil { + return nil, fmt.Errorf("failed to deploy virtual node: %w", err) + } + defer func() { _ = vcDone() }() + + var wg sync.WaitGroup + wg.Add(1) + + restartInterval := 10 * time.Second + dpCtx, dpCancel := context.WithCancel(ctx) + + podSize := cliCtx.Int("podsize") + rollingUpdateFn, err := utils.RepeatRollingUpdate10KPod(dpCtx, kubeCfgPath, "dp5pod10k", podSize, restartInterval) + if err != nil { + dpCancel() + return nil, fmt.Errorf("failed to setup workload: %w", err) + } + + go func() { + defer wg.Done() + + // FIXME(weifu): + // + // DeployRunnerGroup should return ready notification. + // The rolling update should run after runners. + rollingUpdateFn() + }() + + rgResult, derr := utils.DeployRunnerGroup(ctx, + cliCtx.GlobalString("kubeconfig"), + cliCtx.GlobalString("runner-image"), + rgCfgFile, + cliCtx.GlobalString("runner-flowcontrol"), + cliCtx.GlobalString("rg-affinity"), + ) + dpCancel() + wg.Wait() + + if derr != nil { + return nil, derr + } + + return &internaltypes.BenchmarkReport{ + Description: fmt.Sprintf(` +Environment: 100 virtual nodes managed by kwok-controller, +Workload: Deploy 5 deployments with 10,000 pods. Rolling-update deployments one by one and the interval is %v`, restartInterval), + LoadSpec: *rgSpec, + Result: *rgResult, + Info: map[string]interface{}{ + "podSizeInBytes": podSize, + }, + }, nil +} diff --git a/contrib/cmd/runkperf/commands/bench/root.go b/contrib/cmd/runkperf/commands/bench/root.go index 0a7193e..945d7e0 100644 --- a/contrib/cmd/runkperf/commands/bench/root.go +++ b/contrib/cmd/runkperf/commands/bench/root.go @@ -53,5 +53,6 @@ var Command = cli.Command{ }, Subcommands: []cli.Command{ benchNode100Job1Pod3KCase, + benchNode100Deployment5Pod10KCase, }, } diff --git a/contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml b/contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml new file mode 100644 index 0000000..4b75cd5 --- /dev/null +++ b/contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml @@ -0,0 +1,40 @@ +count: 10 +loadProfile: + version: 1 + description: "node100-deployment5-pod10k" + spec: + rate: 10 + total: 36000 + conns: 10 + client: 100 + contentType: json + disableHTTP2: false + maxRetries: 0 + requests: + - staleList: + version: v1 + resource: pods + # NOTE: Please align with ../../utils/utils.go#RepeatRollingUpdate10KPod + seletor: "app=benchmark" + # NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go. + # And there are only 100 nodes and each node can run 150 pods. It should + # have items in the response. + fieldSelector: "spec.nodeName=node100dp5pod10k-49" + shares: 1000 # 1000 / (1000 + 100 + 200) * 10 = 7.7 req/s + - staleList: + version: v1 + resource: pods + shares: 100 # 100 / (1000 + 100 + 200) * 10 = 0.7 req/s + - quorumList: + version: v1 + resource: pods + namespace: benchmark-0 + # NOTE: It's to simulate the request created by daemonset to get pods, + # including kubelet, when they want to get pods from ETCD. The limit + # is 100 because it's close to MaxPods value. + limit: 100 + # NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go. + fieldSelector: "spec.nodeName=node100dp5pod10k-49" + # And there are only 100 nodes and each node can run 150 pods. It should + # have items in the response. + shares: 200 # 200 / (1000 + 100 + 200) * 10 = 1.5 req/s