Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: init node100_dp5_pod10k for runkperf bench #117

Merged
merged 9 commits into from
Apr 23, 2024
2 changes: 2 additions & 0 deletions api/types/load_traffic.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type RequestList struct {
Limit int `json:"limit" yaml:"limit"`
// Selector defines how to identify a set of objects.
Selector string `json:"seletor" yaml:"seletor"`
// FieldSelector defines how to identify a set of objects with field selector.
FieldSelector string `json:"fieldSelector" yaml:"fieldSelector"`
}

// RequestPut defines PUT request for target resource type.
Expand Down
2 changes: 2 additions & 0 deletions api/types/load_traffic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ spec:
resource: pods
namespace: default
seletor: app=x2
fieldSelector: spec.nodeName=x
shares: 200
- quorumList:
group: core
Expand Down Expand Up @@ -94,6 +95,7 @@ spec:
assert.Equal(t, "default", target.Spec.Requests[2].StaleList.Namespace)
assert.Equal(t, 0, target.Spec.Requests[2].StaleList.Limit)
assert.Equal(t, "app=x2", target.Spec.Requests[2].StaleList.Selector)
assert.Equal(t, "spec.nodeName=x", target.Spec.Requests[2].StaleList.FieldSelector)

assert.NotNil(t, target.Spec.Requests[3].QuorumList)
assert.Equal(t, 400, target.Spec.Requests[3].Shares)
Expand Down
108 changes: 108 additions & 0 deletions contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package bench

import (
"context"
"fmt"
"sync"
"time"

internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
)

var benchNode100Deployment5Pod10KCase = cli.Command{
Name: "node100_dp5_pod10k",
Usage: `

The test suite is to setup 100 virtual nodes and deploy 5 deployments for 10k
pods on that nodes. It repeats to rolling-update deployments one by one during
benchmark.
`,
Flags: []cli.Flag{
cli.IntFlag{
Name: "total",
Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 10)",
Value: 36000,
},
cli.IntFlag{
Name: "podsize",
Usage: "Add <key=data, value=randomStringByLen(podsize)> in pod's annotation to increase pod size. The value is close to pod's size",
Value: 0,
},
},
Action: func(cliCtx *cli.Context) error {
_, err := renderBenchmarkReportInterceptor(
addAPIServerCoresInfoInterceptor(benchNode100Deployment5Pod10KRun),
)(cliCtx)
return err
},
}

// benchNode100Deployment5Pod10KCase is for subcommand benchNode100Deployment5Pod10KCase.
func benchNode100Deployment5Pod10KRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) {
ctx := context.Background()
kubeCfgPath := cliCtx.GlobalString("kubeconfig")

rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx,
"loadprofile/node100_dp5_pod10k.yaml")
if err != nil {
return nil, err
}
defer func() { _ = rgCfgFileDone() }()

vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node100dp5pod10k", 100, 150)
if err != nil {
return nil, fmt.Errorf("failed to deploy virtual node: %w", err)
}
defer func() { _ = vcDone() }()

var wg sync.WaitGroup
wg.Add(1)

restartInterval := 10 * time.Second
dpCtx, dpCancel := context.WithCancel(ctx)

podSize := cliCtx.Int("podsize")
rollingUpdateFn, err := utils.RepeatRollingUpdate10KPod(dpCtx, kubeCfgPath, "dp5pod10k", podSize, restartInterval)
if err != nil {
dpCancel()
return nil, fmt.Errorf("failed to setup workload: %w", err)
}

go func() {
defer wg.Done()

// FIXME(weifu):
//
// DeployRunnerGroup should return ready notification.
// The rolling update should run after runners.
rollingUpdateFn()
}()

rgResult, derr := utils.DeployRunnerGroup(ctx,
cliCtx.GlobalString("kubeconfig"),
cliCtx.GlobalString("runner-image"),
rgCfgFile,
cliCtx.GlobalString("runner-flowcontrol"),
cliCtx.GlobalString("rg-affinity"),
)
dpCancel()
wg.Wait()

if derr != nil {
return nil, derr
}

return &internaltypes.BenchmarkReport{
Description: fmt.Sprintf(`
Environment: 100 virtual nodes managed by kwok-controller,
Workload: Deploy 5 deployments with 10,000 pods. Rolling-update deployments one by one and the interval is %v`, restartInterval),
LoadSpec: *rgSpec,
Result: *rgResult,
Info: map[string]interface{}{
"podSizeInBytes": podSize,
},
}, nil
}
33 changes: 3 additions & 30 deletions contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@ import (
"sync"
"time"

"github.com/Azure/kperf/api/types"
kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils"
internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
"gopkg.in/yaml.v2"
"k8s.io/klog/v2"
)

var benchNode100Job1Pod3KCase = cli.Command{
Expand Down Expand Up @@ -43,31 +39,8 @@ func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.Benchmark
ctx := context.Background()
kubeCfgPath := cliCtx.GlobalString("kubeconfig")

var rgSpec types.RunnerGroupSpec
rgCfgFile, rgCfgFileDone, err := utils.NewLoadProfileFromEmbed(
"loadprofile/node100_job1_pod3k.yaml",
func(spec *types.RunnerGroupSpec) error {
reqs := cliCtx.Int("total")
if reqs < 0 {
return fmt.Errorf("invalid total-requests value: %v", reqs)
}

rgAffinity := cliCtx.GlobalString("rg-affinity")
affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity})
if err != nil {
return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err)
}

spec.Profile.Spec.Total = reqs
spec.NodeAffinity = affinityLabels

data, _ := yaml.Marshal(spec)
klog.V(2).InfoS("Load Profile", "config", string(data))

rgSpec = *spec
return nil
},
)
rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx,
"loadprofile/node100_job1_pod3k.yaml")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -108,7 +81,7 @@ func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.Benchmark
Description: fmt.Sprintf(`
Environment: 100 virtual nodes managed by kwok-controller,
Workload: Deploy 1 job with 3,000 pods repeatedly. The parallelism is 100. The interval is %v`, jobInterval),
LoadSpec: rgSpec,
LoadSpec: *rgSpec,
Result: *rgResult,
Info: make(map[string]interface{}),
}, nil
Expand Down
1 change: 1 addition & 0 deletions contrib/cmd/runkperf/commands/bench/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,6 @@ var Command = cli.Command{
},
Subcommands: []cli.Command{
benchNode100Job1Pod3KCase,
benchNode100Deployment5Pod10KCase,
},
}
37 changes: 37 additions & 0 deletions contrib/cmd/runkperf/commands/bench/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import (
"os"
"path/filepath"

"github.com/Azure/kperf/api/types"
kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils"
internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
"gopkg.in/yaml.v2"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -120,3 +123,37 @@ func deployVirtualNodepool(ctx context.Context, cliCtx *cli.Context, target stri
return kr.DeleteNodepool(ctx, 0, target)
}, nil
}

// newLoadProfileFromEmbed loads load profile from embed and tweaks that load
// profile.
func newLoadProfileFromEmbed(cliCtx *cli.Context, name string) (_name string, _spec *types.RunnerGroupSpec, _cleanup func() error, _err error) {
var rgSpec types.RunnerGroupSpec
rgCfgFile, rgCfgFileDone, err := utils.NewLoadProfileFromEmbed(
name,
func(spec *types.RunnerGroupSpec) error {
reqs := cliCtx.Int("total")
if reqs < 0 {
return fmt.Errorf("invalid total-requests value: %v", reqs)
}

rgAffinity := cliCtx.GlobalString("rg-affinity")
affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity})
if err != nil {
return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err)
}

spec.Profile.Spec.Total = reqs
spec.NodeAffinity = affinityLabels

data, _ := yaml.Marshal(spec)
klog.V(2).InfoS("Load Profile", "config", string(data))

rgSpec = *spec
return nil
},
)
if err != nil {
return "", nil, nil, err
}
return rgCfgFile, &rgSpec, rgCfgFileDone, nil
}
12 changes: 12 additions & 0 deletions contrib/internal/manifests/helm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package manifests

import (
rootmainfests "github.com/Azure/kperf/manifests"

"helm.sh/helm/v3/pkg/chart"
)

// LoadChart returns chart from current package's embed filesystem.
func LoadChart(componentName string) (*chart.Chart, error) {
return rootmainfests.LoadChartFromEmbedFS(FS, componentName)
}
40 changes: 40 additions & 0 deletions contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
count: 10
loadProfile:
version: 1
description: "node100-deployment5-pod10k"
spec:
rate: 10
total: 36000
conns: 10
client: 100
contentType: json
disableHTTP2: false
maxRetries: 0
requests:
- staleList:
version: v1
resource: pods
# NOTE: Please align with ../../utils/utils.go#RepeatRollingUpdate10KPod
seletor: "app=benchmark"
# NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go.
# And there are only 100 nodes and each node can run 150 pods. It should
# have items in the response.
fieldSelector: "spec.nodeName=node100dp5pod10k-49"
shares: 1000 # 1000 / (1000 + 100 + 200) * 10 = 7.7 req/s
- staleList:
version: v1
resource: pods
shares: 100 # 100 / (1000 + 100 + 200) * 10 = 0.7 req/s
- quorumList:
version: v1
resource: pods
namespace: benchmark-0
# NOTE: It's to simulate the request created by daemonset to get pods,
# including kubelet, when they want to get pods from ETCD. The limit
# is 100 because it's close to MaxPods value.
limit: 100
# NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go.
fieldSelector: "spec.nodeName=node100dp5pod10k-49"
# And there are only 100 nodes and each node can run 150 pods. It should
# have items in the response.
shares: 200 # 200 / (1000 + 100 + 200) * 10 = 1.5 req/s
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
apiVersion: v1
name: "2k-pods-per-1-deployment"
version: "0.0.1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{{- $pattern := .Values.pattern }}
{{- $podSizeInBytes := int .Values.podSizeInBytes }}
{{- range $index := (untilStep 0 (int .Values.total) 1) }}
apiVersion: v1
kind: Namespace
metadata:
name: {{ $pattern }}-{{ $index }}
labels:
name: benchmark-testing
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $pattern }}-{{ $index }}
namespace: {{ $pattern }}-{{ $index }}
labels:
app: {{ $pattern }}
spec:
replicas: 2000
strategy:
rollingUpdate:
maxSurge: 100
type: RollingUpdate
selector:
matchLabels:
app: {{ $pattern }}
index: "{{ $index }}"
template:
metadata:
labels:
app: {{ $pattern }}
index: "{{ $index }}"
annotations:
data: "{{ randAlphaNum $podSizeInBytes | nospace }}"
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: type
operator: In
values:
- kperf-virtualnodes
tolerations:
- key: "kperf.io/nodepool"
operator: "Exists"
effect: "NoSchedule"
containers:
- name: fake-container
image: fake-image
---
{{- end}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pattern: "benchmark"
total: 5
podSizeInBytes: 2048
Loading
Loading