From 174fb8ad5c7221b8c0acbf75018c794e9938a5c8 Mon Sep 17 00:00:00 2001 From: "Jonathan A. Sternberg" Date: Mon, 13 Jan 2025 10:26:54 -0600 Subject: [PATCH] cache: convert prune loop from recursive invocation to iterative The prune logic would prune multiple times because one prune could cause more things to be capable of pruning and change the logic. This was done through a recursive invocation. Since go doesn't have support for function tail calls, this would result in a new stack entry for each loop. This unrolls the logic so the prune function is invoked iteratively rather than recursively. `prune` and `pruneOnce` have also had their names swapped. In general, `pruneOnce` implies that it runs a single prune while `prune` sounds like the top level function. The current code had this reversed and `pruneOnce` would call `prune` and `prune` would call itself recursively. I've also updated a section in the controller that invoked prune on each worker. In older versions of Go, the current version was correct because those versions of Go would reuse the location for each loop which would cause goroutines to all reference the same worker instead of different workers. Recent versions of Go have changed the behavior so this is no longer needed. Signed-off-by: Jonathan A. Sternberg --- cache/manager.go | 41 +++++++++++++++++++++------------- control/control.go | 14 +++++------- hack/composefiles/compose.yaml | 3 --- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/cache/manager.go b/cache/manager.go index c1b3d52ec1d6..5b037326271d 100644 --- a/cache/manager.go +++ b/cache/manager.go @@ -1012,7 +1012,7 @@ func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opt cm.muPrune.Lock() for _, opt := range opts { - if err := cm.pruneOnce(ctx, ch, opt); err != nil { + if err := cm.prune(ctx, ch, opt); err != nil { cm.muPrune.Unlock() return err } @@ -1029,7 +1029,7 @@ func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opt return nil } -func (cm *cacheManager) pruneOnce(ctx context.Context, ch chan client.UsageInfo, opt client.PruneInfo) error { +func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt client.PruneInfo) error { filter, err := filters.ParseAll(opt.Filter...) if err != nil { return errors.Wrapf(err, "failed to parse prune filters %v", opt.Filter) @@ -1066,14 +1066,21 @@ func (cm *cacheManager) pruneOnce(ctx context.Context, ch chan client.UsageInfo, } } - return cm.prune(ctx, ch, pruneOpt{ + popt := pruneOpt{ filter: filter, all: opt.All, checkShared: check, keepDuration: opt.KeepDuration, keepBytes: calculateKeepBytes(totalSize, dstat, opt), totalSize: totalSize, - }) + } + for { + releasedSize, releasedCount, err := cm.pruneOnce(ctx, ch, popt) + if err != nil || releasedCount == 0 { + return err + } + popt.totalSize -= releasedSize + } } func calculateKeepBytes(totalSize int64, dstat disk.DiskStat, opt client.PruneInfo) int64 { @@ -1100,9 +1107,9 @@ func calculateKeepBytes(totalSize int64, dstat disk.DiskStat, opt client.PruneIn return keepBytes } -func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt pruneOpt) (err error) { +func (cm *cacheManager) pruneOnce(ctx context.Context, ch chan client.UsageInfo, opt pruneOpt) (releasedSize, releasedCount int64, err error) { if opt.keepBytes != 0 && opt.totalSize < opt.keepBytes { - return nil + return 0, 0, nil } var toDelete []*deleteRecord @@ -1206,11 +1213,11 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt // mark metadata as deleted in case we crash before cleanup finished if err := cr.queueDeleted(); err != nil { releaseLocks() - return err + return 0, 0, err } if err := cr.commitMetadata(); err != nil { releaseLocks() - return err + return 0, 0, err } } cr.mu.Unlock() @@ -1221,7 +1228,7 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt cm.mu.Unlock() if len(toDelete) == 0 { - return nil + return 0, 0, nil } // calculate sizes here so that lock does not need to be held for slow process @@ -1234,7 +1241,7 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt if size == sizeUnknown { // calling size will warm cache for next call if _, err := cr.size(ctx); err != nil { - return err + return 0, 0, err } } } @@ -1277,15 +1284,18 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt c.Size = cr.equalImmutable.getSize() // benefit from DiskUsage calc } - opt.totalSize -= c.Size + releasedSize += c.Size if cr.equalImmutable != nil { if err1 := cr.equalImmutable.remove(ctx, false); err == nil { err = err1 } } - if err1 := cr.remove(ctx, true); err == nil { + + if err1 := cr.remove(ctx, true); err1 != nil && err == nil { err = err1 + } else if err1 == nil { + releasedCount++ } if err == nil && ch != nil { @@ -1294,16 +1304,17 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt cr.mu.Unlock() } cm.mu.Unlock() + if err != nil { - return err + return releasedSize, releasedCount, err } select { case <-ctx.Done(): - return context.Cause(ctx) + err = context.Cause(ctx) default: - return cm.prune(ctx, ch, opt) } + return releasedSize, releasedCount, err } func (cm *cacheManager) markShared(m map[string]*cacheUsageInfo) error { diff --git a/control/control.go b/control/control.go index 9f1c4ce6708f..3fb19576cf81 100644 --- a/control/control.go +++ b/control/control.go @@ -623,14 +623,12 @@ func (c *Controller) gc() { }() for _, w := range workers { - func(w worker.Worker) { - eg.Go(func() error { - if policy := w.GCPolicy(); len(policy) > 0 { - return w.Prune(ctx, ch, policy...) - } - return nil - }) - }(w) + eg.Go(func() error { + if policy := w.GCPolicy(); len(policy) > 0 { + return w.Prune(ctx, ch, policy...) + } + return nil + }) } err = eg.Wait() diff --git a/hack/composefiles/compose.yaml b/hack/composefiles/compose.yaml index 0536f6725f81..8e68c9f218b5 100644 --- a/hack/composefiles/compose.yaml +++ b/hack/composefiles/compose.yaml @@ -4,8 +4,6 @@ services: container_name: buildkit-dev build: context: ../.. - args: - BUILDKIT_DEBUG: 1 image: moby/buildkit:local ports: - 127.0.0.1:5000:5000 @@ -13,7 +11,6 @@ services: restart: always privileged: true environment: - DELVE_PORT: 5000 OTEL_SERVICE_NAME: buildkitd OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 configs: