From fbcd67406eed31f1f379c288c2e44c1abcecdf4f Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Wed, 7 Aug 2024 22:15:56 +0000 Subject: [PATCH] add test case with 10 virtual nodes and 100 pods --- .../commands/bench/node100_job1_pod3k.go | 2 +- .../commands/bench/node10_job1_pod100.go | 96 +++++++++++++++++++ contrib/cmd/runkperf/commands/bench/root.go | 1 + .../runkperf/commands/ekswarmup/command.go | 2 +- .../loadprofile/node10_job1_pod100.yaml | 27 ++++++ .../manifests/workload/100pod.job.yaml | 31 ++++++ contrib/internal/utils/utils.go | 6 +- examples/node10_job1_pod100.yaml | 25 +++++ 8 files changed, 184 insertions(+), 6 deletions(-) create mode 100644 contrib/cmd/runkperf/commands/bench/node10_job1_pod100.go create mode 100644 contrib/internal/manifests/loadprofile/node10_job1_pod100.yaml create mode 100644 contrib/internal/manifests/workload/100pod.job.yaml create mode 100644 examples/node10_job1_pod100.yaml diff --git a/contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go b/contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go index 742e88c..7fb73af 100644 --- a/contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go +++ b/contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go @@ -68,7 +68,7 @@ func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.Benchmark go func() { defer wg.Done() - utils.RepeatJobWith3KPod(jobCtx, kubeCfgPath, "job1pod3k", jobInterval) + utils.RepeatJobWithPod(jobCtx, kubeCfgPath, "job1pod3k", "workload/3kpod.job.yaml", jobInterval) }() rgResult, derr := utils.DeployRunnerGroup(ctx, diff --git a/contrib/cmd/runkperf/commands/bench/node10_job1_pod100.go b/contrib/cmd/runkperf/commands/bench/node10_job1_pod100.go new file mode 100644 index 0000000..4702a12 --- /dev/null +++ b/contrib/cmd/runkperf/commands/bench/node10_job1_pod100.go @@ -0,0 +1,96 @@ +package bench + +import ( + "context" + "fmt" + "sync" + "time" + + internaltypes "github.com/Azure/kperf/contrib/internal/types" + "github.com/Azure/kperf/contrib/internal/utils" + + "github.com/urfave/cli" +) + +var benchNode10Job1Pod100Case = cli.Command{ + Name: "node10_job1_pod100", + Usage: ` + +The test suite is to setup 10 virtual nodes and deploy one job with 100 pods on +that nodes. It repeats to create and delete job. The load profile is fixed. + `, + Flags: append( + []cli.Flag{ + cli.IntFlag{ + Name: "total", + Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 1)", + Value: 1000, + }, + }, + commonFlags..., + ), + Action: func(cliCtx *cli.Context) error { + _, err := renderBenchmarkReportInterceptor( + addAPIServerCoresInfoInterceptor(benchNode10Job1Pod100CaseRun), + )(cliCtx) + return err + }, +} + +// benchNode10Job1Pod100CaseRun is for benchNode10Job1Pod100Case subcommand. +func benchNode10Job1Pod100CaseRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { + ctx := context.Background() + kubeCfgPath := cliCtx.GlobalString("kubeconfig") + + rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, + "loadprofile/node10_job1_pod100.yaml") + if err != nil { + return nil, err + } + defer func() { _ = rgCfgFileDone() }() + + vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node10job1pod100", + 100, + cliCtx.Int("cpu"), + cliCtx.Int("memory"), + cliCtx.Int("max-pods"), + ) + if err != nil { + return nil, fmt.Errorf("failed to deploy virtual node: %w", err) + } + defer func() { _ = vcDone() }() + + var wg sync.WaitGroup + wg.Add(1) + + jobInterval := 5 * time.Second + jobCtx, jobCancel := context.WithCancel(ctx) + go func() { + defer wg.Done() + + utils.RepeatJobWithPod(jobCtx, kubeCfgPath, "job1pod100", "workload/100pod.job.yaml", jobInterval) + }() + + rgResult, derr := utils.DeployRunnerGroup(ctx, + cliCtx.GlobalString("kubeconfig"), + cliCtx.GlobalString("runner-image"), + rgCfgFile, + cliCtx.GlobalString("runner-flowcontrol"), + cliCtx.GlobalString("rg-affinity"), + ) + jobCancel() + wg.Wait() + + if derr != nil { + return nil, derr + } + + return &internaltypes.BenchmarkReport{ + Description: fmt.Sprintf(` +Environment: 100 virtual nodes managed by kwok-controller, +Workload: Deploy 1 job with 3,000 pods repeatedly. The parallelism is 100. The interval is %v`, jobInterval), + LoadSpec: *rgSpec, + Result: *rgResult, + Info: make(map[string]interface{}), + }, nil +} diff --git a/contrib/cmd/runkperf/commands/bench/root.go b/contrib/cmd/runkperf/commands/bench/root.go index ecafef5..9f4a64d 100644 --- a/contrib/cmd/runkperf/commands/bench/root.go +++ b/contrib/cmd/runkperf/commands/bench/root.go @@ -52,6 +52,7 @@ var Command = cli.Command{ }, }, Subcommands: []cli.Command{ + benchNode10Job1Pod100Case, benchNode100Job1Pod3KCase, benchNode100DeploymentNPod10KCase, }, diff --git a/contrib/cmd/runkperf/commands/ekswarmup/command.go b/contrib/cmd/runkperf/commands/ekswarmup/command.go index 4382ae1..5968b0b 100644 --- a/contrib/cmd/runkperf/commands/ekswarmup/command.go +++ b/contrib/cmd/runkperf/commands/ekswarmup/command.go @@ -117,7 +117,7 @@ var Command = cli.Command{ go func() { defer wg.Done() - utils.RepeatJobWith3KPod(jobCtx, kubeCfgPath, "warmupjob", 5*time.Second) + utils.RepeatJobWithPod(jobCtx, kubeCfgPath, "warmupjob", "workload/3kpod.job.yaml", 5*time.Second) }() _, derr := utils.DeployRunnerGroup(ctx, diff --git a/contrib/internal/manifests/loadprofile/node10_job1_pod100.yaml b/contrib/internal/manifests/loadprofile/node10_job1_pod100.yaml new file mode 100644 index 0000000..6b0df14 --- /dev/null +++ b/contrib/internal/manifests/loadprofile/node10_job1_pod100.yaml @@ -0,0 +1,27 @@ +count: 1 +loadProfile: + version: 1 + description: "node10-job1-pod100" + spec: + rate: 10 + total: 1000 + conns: 10 + client: 10 + contentType: json + disableHTTP2: false + maxRetries: 0 + requests: + - staleList: + version: v1 + resource: pods + shares: 1000 # chance 1000 / (1000 + 100 + 100) + - quorumList: + version: v1 + resource: pods + limit: 1000 + shares: 100 # chance 100 / (1000 + 100 + 100) + - quorumList: + version: v1 + resource: events + limit: 1000 + shares: 100 # chance 100 / (1000 + 100 + 100) diff --git a/contrib/internal/manifests/workload/100pod.job.yaml b/contrib/internal/manifests/workload/100pod.job.yaml new file mode 100644 index 0000000..8a19105 --- /dev/null +++ b/contrib/internal/manifests/workload/100pod.job.yaml @@ -0,0 +1,31 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: batchjobs +spec: + completions: 100 + parallelism: 10 + template: + metadata: + labels: + app: fake-pod + spec: + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: type + operator: In + values: + - kperf-virtualnodes + # A taints was added to an automatically created Node. + # You can remove taints of Node or add this tolerations. + tolerations: + - key: "kperf.io/nodepool" + operator: "Exists" + effect: "NoSchedule" + containers: + - name: fake-container + image: fake-image diff --git a/contrib/internal/utils/utils.go b/contrib/internal/utils/utils.go index 2f530fa..d06c9fa 100644 --- a/contrib/internal/utils/utils.go +++ b/contrib/internal/utils/utils.go @@ -44,12 +44,10 @@ var ( EKSRunnerNodepoolInstanceType = "m4.4xlarge" ) -// RepeatJobWith3KPod repeats to deploy 3k pods. -func RepeatJobWith3KPod(ctx context.Context, kubeCfgPath string, namespace string, internal time.Duration) { +// RepeatJobWithPod repeats to deploy 3k pods. +func RepeatJobWithPod(ctx context.Context, kubeCfgPath string, namespace string, target string, internal time.Duration) { klog.V(0).Info("Repeat to create job with 3k pods") - target := "workload/3kpod.job.yaml" - data, err := manifests.FS.ReadFile(target) if err != nil { panic(fmt.Errorf("unexpected error when read %s from embed memory: %v", diff --git a/examples/node10_job1_pod100.yaml b/examples/node10_job1_pod100.yaml new file mode 100644 index 0000000..4755091 --- /dev/null +++ b/examples/node10_job1_pod100.yaml @@ -0,0 +1,25 @@ + version: 1 + description: "node10-job1-pod100" + spec: + rate: 10 + total: 1000 + conns: 10 + client: 10 + contentType: json + disableHTTP2: false + maxRetries: 0 + requests: + - staleList: + version: v1 + resource: pods + shares: 1000 # chance 1000 / (1000 + 100 + 100) + - quorumList: + version: v1 + resource: pods + limit: 1000 + shares: 100 # chance 100 / (1000 + 100 + 100) + - quorumList: + version: v1 + resource: events + limit: 1000 + shares: 100 # chance 100 / (1000 + 100 + 100) \ No newline at end of file