Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Databricks][Cgroups] Extended cgroup support #18

Open
wants to merge 1 commit into
base: databricks-bazel-7.4.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ public enum WorkerProtocolFormat {
public static final String DIFFERENTIATE_WORKSPACE_CACHE =
"internal-differentiate-workspace-cache";

/** Disables cgroups for a spawn */
public static final String NO_SUPPORTS_CGROUPS = "no-supports-cgroups";

/**
* Indicates that the action is compatible with path mapping, e.g., removing the configuration
* segment from the paths of all inputs and outputs.
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1691,6 +1691,7 @@ java_library(
":config/run_under",
":config/starlark_defined_config_transition",
":platform_options",
":test/test_configuration",
"//src/main/java/com/google/devtools/build/lib/actions:action_environment",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:build_configuration_event",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
import com.google.devtools.build.lib.starlarkbuildapi.BuildConfigurationApi;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.build.lib.util.RegexFilter;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.SkyValue;
Expand Down Expand Up @@ -988,4 +989,16 @@ public static BuildEvent buildEvent(@Nullable BuildConfigurationValue configurat
public ImmutableSet<String> getReservedActionMnemonics() {
return reservedActionMnemonics;
}

public Map<String, Double> getTestResources(com.google.devtools.build.lib.packages.TestSize size) {
if (!buildOptions.contains(com.google.devtools.build.lib.analysis.test.TestConfiguration.TestOptions.class)) {
return ImmutableMap.of();
}
return buildOptions
.get(com.google.devtools.build.lib.analysis.test.TestConfiguration.TestOptions.class)
.testResources
.stream()
.collect(ImmutableMap.toImmutableMap(e -> e.getFirst(), e -> e.getSecond().get(size)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ private static ResourceSet getResourceSetFromSize(TestSize size) {
Map<String, String> executionInfo = Maps.newLinkedHashMap();
executionInfo.putAll(TargetUtils.getExecutionInfo(rule));

Map<String, Double> requestedResources;
try {
requestedResources = parseTags(ruleContext.getLabel(), executionInfo);
} catch (UserExecException e) {
requestedResources = new HashMap<>();
}

Map<String, Double> testResources = ruleContext.getConfiguration().getTestResources(size);
for (Map.Entry<String, Double> request: testResources.entrySet()) {
if (requestedResources.containsKey(request.getKey())) {
continue;
}
executionInfo.put(String.format("resources:%s:%f", request.getKey(), request.getValue()), "");
}

boolean incompatibleExclusiveTestSandboxed = false;

testConfiguration = ruleContext.getFragment(TestConfiguration.class);
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/com/google/devtools/build/lib/sandbox/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ java_library(
deps = [
"//src/main/java/com/google/devtools/build/lib/actions:localhost_capacity",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:cpu_resource_converter",
"//src/main/java/com/google/devtools/build/lib/util:ram_resource_converter",
"//src/main/java/com/google/devtools/build/lib/util:resource_converter",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down Expand Up @@ -212,6 +213,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib:runtime",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:exec_exception",
"//src/main/java/com/google/devtools/build/lib/actions:execution_requirements",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories",
Expand All @@ -223,11 +225,14 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/exec/local",
"//src/main/java/com/google/devtools/build/lib/exec/local:options",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups",
"//src/main/java/com/google/devtools/build/lib/shell",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:pair",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/protobuf:failure_details_proto",
"//src/main/protobuf:failure_details_java_proto",
"//third_party:flogger",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class LinuxSandboxCommandLineBuilder {
private boolean enablePseudoterminal = false;
private String sandboxDebugPath = null;
private boolean sigintSendsSigterm = false;
private String cgroupsDir;
private Set<Path> cgroupsDirs = ImmutableSet.of();

private LinuxSandboxCommandLineBuilder(Path linuxSandboxPath) {
this.linuxSandboxPath = linuxSandboxPath;
Expand Down Expand Up @@ -215,8 +215,8 @@ public LinuxSandboxCommandLineBuilder setSandboxDebugPath(String sandboxDebugPat
* this directory, its parent directory, and the cgroup directory for the Bazel process.
*/
@CanIgnoreReturnValue
public LinuxSandboxCommandLineBuilder setCgroupsDir(String cgroupsDir) {
this.cgroupsDir = cgroupsDir;
public LinuxSandboxCommandLineBuilder setCgroupsDirs(Set<Path> cgroupsDirs) {
this.cgroupsDirs = cgroupsDirs;
return this;
}

Expand Down Expand Up @@ -299,8 +299,8 @@ public ImmutableList<String> buildForCommand(List<String> commandArguments) {
if (persistentProcess) {
commandLineBuilder.add("-p");
}
if (cgroupsDir != null) {
commandLineBuilder.add("-C", cgroupsDir);
for (Path dir: cgroupsDirs) {
commandLineBuilder.add("-C", dir.toString());
}
commandLineBuilder.add("--");
commandLineBuilder.addAll(commandArguments);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import com.google.devtools.build.lib.runtime.CommandEnvironment;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxOutputs;
import com.google.devtools.build.lib.sandbox.cgroups.VirtualCGroup;
import com.google.devtools.build.lib.server.FailureDetails;
import com.google.devtools.build.lib.shell.Command;
import com.google.devtools.build.lib.shell.CommandException;
import com.google.devtools.build.lib.util.OS;
Expand All @@ -59,6 +61,7 @@
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.SortedMap;
import java.util.Set;
import java.util.TreeMap;
Expand All @@ -74,6 +77,8 @@ final class LinuxSandboxedSpawnRunner extends AbstractSandboxSpawnRunner {

private static final AtomicBoolean warnedAboutUnsupportedModificationCheck = new AtomicBoolean();

private java.util.concurrent.ConcurrentHashMap<Integer, Optional<VirtualCGroup>> cgroups;

/**
* Returns whether the linux sandbox is supported on the local machine by running a small command
* in it.
Expand Down Expand Up @@ -166,10 +171,98 @@ private static boolean computeIsSupported(CommandEnvironment cmdEnv, Path linuxS
this.localEnvProvider = new PosixLocalEnvProvider(cmdEnv.getClientEnv());
this.treeDeleter = treeDeleter;
this.reporter = cmdEnv.getReporter();
this.cgroups = new java.util.concurrent.ConcurrentHashMap<>();
this.slashTmp = cmdEnv.getRuntime().getFileSystem().getPath("/tmp");
this.knownPathsToMountUnderHermeticTmp = collectPathsToMountUnderHermeticTmp(cmdEnv);
}

private Optional<VirtualCGroup> getCgroup(Spawn spawn, SpawnExecutionContext context) throws ExecException, IOException {
if (spawn.getExecutionInfo().get(ExecutionRequirements.NO_SUPPORTS_CGROUPS) != null) {
return Optional.empty();
}
if (cgroups.containsKey(context.getId())) {
return cgroups.get(context.getId());
}

SandboxOptions sandboxOptions = getSandboxOptions();

VirtualCGroup cgroup = null;
long memoryLimit = sandboxOptions.memoryLimitMb * 1024L * 1024L;
float cpuLimit = sandboxOptions.cpuLimit;

if (sandboxOptions.executionInfoLimit) {
ExecutionRequirements.ParseableRequirement requirement = ExecutionRequirements.RESOURCES;
for (String tag : spawn.getExecutionInfo().keySet()) {
try {
requirement = ExecutionRequirements.RESOURCES;
String name = null;
Float value = null;

String extras = requirement.parseIfMatches(tag);
if (extras != null) {
int index = extras.indexOf(":");
name = extras.substring(0, index);
value = Float.parseFloat(extras.substring(index + 1));
} else {
requirement = ExecutionRequirements.CPU;
String cpus = requirement.parseIfMatches(tag);
if (cpus != null) {
name = "cpu";
value = Float.parseFloat(cpus);
}
}
if (name == null) {
continue;
}
switch (name) {
case "memory":
memoryLimit = Math.round(value * 1024.0 * 1024.0);
break;
case "cpu":
cpuLimit = value;
break;
}
} catch (ExecutionRequirements.ParseableRequirement.ValidationException e) {
String message =
String.format(
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s",
spawn.getTargetLabel(),
requirement.userFriendlyName(),
e.getTagValue(),
e.getMessage());
FailureDetails.Spawn.Code code = FailureDetails.Spawn.Code.COMMAND_LINE_EXPANSION_FAILURE;
FailureDetails.FailureDetail details = FailureDetails.FailureDetail
.newBuilder()
.setMessage(message)
.setSpawn(FailureDetails.Spawn.newBuilder().setCode(code))
.build();
throw new UserExecException(e, details);
}
}
}

// We put the sandbox inside a unique subdirectory using the context's ID. This ID is
// unique per spawn run by this spawn runner.
String scope = "sandbox_" + context.getId() + ".scope";
if (memoryLimit > 0) {
if (cgroup == null) {
cgroup = VirtualCGroup.getInstance(this.reporter).child(scope);
}
cgroup.memory().setMaxBytes(memoryLimit);
}

if (cpuLimit > 0) {
if (cgroup == null) {
cgroup = VirtualCGroup.getInstance(this.reporter).child(scope);
}
cgroup.cpu().setCpus(cpuLimit);
}

cgroups.put(context.getId(), Optional.ofNullable(cgroup));

return cgroups.get(context.getId());
}

private ImmutableSet<Path> collectPathsToMountUnderHermeticTmp(CommandEnvironment cmdEnv) {
// If any path managed or tracked by Bazel is under /tmp, it needs to be explicitly mounted
// into the sandbox when using hermetic /tmp. We attempt to collect an over-approximation of
Expand Down Expand Up @@ -314,14 +407,12 @@ protected SandboxedSpawn prepareSpawn(Spawn spawn, SpawnExecutionContext context
commandLineBuilder.setSandboxDebugPath(sandboxDebugPath.getPathString());
}

if (sandboxOptions.memoryLimitMb > 0) {
CgroupsInfo cgroupsInfo = CgroupsInfo.getInstance();
// We put the sandbox inside a unique subdirectory using the context's ID. This ID is
// unique per spawn run by this spawn runner.
cgroupsDir =
cgroupsInfo.createMemoryLimitCgroupDir(
"sandbox_" + context.getId(), sandboxOptions.memoryLimitMb);
commandLineBuilder.setCgroupsDir(cgroupsDir);
Optional<VirtualCGroup> cgroup = getCgroup(spawn, context);
if (cgroup.isPresent()) {
commandLineBuilder.setCgroupsDirs(
cgroup.get().paths().stream()
.map(p -> fileSystem.getPath(p.toString()))
.collect(ImmutableSet.toImmutableSet()));
}

if (!timeout.isZero()) {
Expand Down Expand Up @@ -447,6 +538,13 @@ public void verifyPostCondition(
if (getSandboxOptions().useHermetic) {
checkForConcurrentModifications(context);
}
Optional<VirtualCGroup> cgroup = cgroups.remove(context.getId());
if (cgroup != null && cgroup.isPresent()) {
// We cannot leave the cgroups around and delete them only when we delete the sandboxes
// because linux has a hard limit of 65535 memory controllers.
// Ref. https://github.com/torvalds/linux/blob/58d4e450a490d5f02183f6834c12550ba26d3b47/include/linux/memcontrol.h#L69
cgroup.get().delete();
}
}

private void checkForConcurrentModifications(SpawnExecutionContext context)
Expand Down Expand Up @@ -511,9 +609,7 @@ private boolean wasModifiedSinceDigest(FileContentsProxy proxy, Path path) throw

@Override
public void cleanupSandboxBase(Path sandboxBase, TreeDeleter treeDeleter) throws IOException {
if (cgroupsDir != null) {
new File(cgroupsDir).delete();
}
VirtualCGroup.deleteInstance();
// Delete the inaccessible files synchronously, bypassing the treeDeleter. They are only a
// couple of files that can be deleted fast, and ensuring they are gone at the end of every
// build avoids annoying permission denied errors if the user happens to run "rm -rf" on the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.devtools.build.lib.util.CpuResourceConverter;
import com.google.devtools.build.lib.util.OptionsUtils;
import com.google.devtools.build.lib.util.RamResourceConverter;
import com.google.devtools.build.lib.util.ResourceConverter;
Expand Down Expand Up @@ -381,6 +382,29 @@ public ImmutableSet<Path> getInaccessiblePaths(FileSystem fs) {
+ " Requires cgroups v1 or v2 and permissions for the users to the cgroups dir.")
public int memoryLimitMb;

@Option(
name = "experimental_sandbox_cpu_limit",
defaultValue = "0",
documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.EXECUTION},
converter = CpuResourceConverter.class,
help =
"If > 0, each Linux sandbox will be limited to the given amount of cpus."
+ " Requires cgroups v1 or v2 and permissions for the users to the cgroups dir.")
public float cpuLimit;

@Option(
name = "experimental_sandbox_execution_info_limit",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.EXECUTION},
help =
"If true, resources declared in the execution info that match a cgroup controller"
+ " will be used to apply the limits. For example a target that declares"
+ " cpu:3 and resources:memory:10, will run with at most 3 cpus and 10"
+ " megabytes of memory.")
public boolean executionInfoLimit;

/** Converter for the number of threads used for asynchronous tree deletion. */
public static final class AsyncTreeDeletesConverter extends ResourceConverter.IntegerConverter {
public AsyncTreeDeletesConverter() {
Expand Down
30 changes: 30 additions & 0 deletions src/main/java/com/google/devtools/build/lib/sandbox/cgroups/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
load("@rules_java//java:defs.bzl", "java_library")

package(
default_applicable_licenses = ["//:license"],
default_visibility = ["//src:__subpackages__"],
)

filegroup(
name = "srcs",
srcs = glob(["**"]),
visibility = ["//src:__subpackages__"],
)

java_library(
name = "cgroups",
srcs = glob([
"*.java",
"v1/*.java",
"v2/*.java",
]),
deps = [
"//src/main/java/com/google/devtools/build/lib/actions:exec_exception",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/protobuf:failure_details_java_proto",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
"//third_party:jsr305",
],
)
Loading
Loading