From 23efe58be72d8bcfc6bf4ead60da233b5426f59b Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Sun, 29 Dec 2024 22:00:50 -0500 Subject: [PATCH 1/4] Reimplement the ArgSplitter in Rust. --- .../pants/engine/internals/native_engine.pyi | 8 + src/python/pants/option/arg_splitter.py | 2 +- src/rust/engine/options/src/arg_splitter.rs | 94 ++++++ .../engine/options/src/arg_splitter_tests.rs | 293 ++++++++++++++++++ src/rust/engine/options/src/lib.rs | 4 + src/rust/engine/src/externs/options.rs | 47 +++ 6 files changed, 447 insertions(+), 1 deletion(-) create mode 100644 src/rust/engine/options/src/arg_splitter.rs create mode 100644 src/rust/engine/options/src/arg_splitter_tests.rs diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index 0b9dd899108..c61209ea50f 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -662,9 +662,17 @@ class PyOptionId: self, *components: str, scope: str | None = None, switch: str | None = None ) -> None: ... +class PySplitArgs: + pass + +class PyArgSplitter: + def __init__(self, build_root: str, known_goals: list[str]) -> None: ... + class PyConfigSource: def __init__(self, path: str, content: bytes) -> None: ... + def split_args(self, args: list[str]) -> PySplitArgs: ... + # See src/rust/engine/src/externs/options.rs for the Rust-side versions of these types. T = TypeVar("T") diff --git a/src/python/pants/option/arg_splitter.py b/src/python/pants/option/arg_splitter.py index 5d9b51b397e..ced90f9dd62 100644 --- a/src/python/pants/option/arg_splitter.py +++ b/src/python/pants/option/arg_splitter.py @@ -109,7 +109,7 @@ def _get_known_goal_scopes( yield alias, si def split_args(self, args: Sequence[str]) -> SplitArgs: - """Split the specified arg list (or sys.argv if unspecified). + """Split the specified arg list. args[0] is ignored. diff --git a/src/rust/engine/options/src/arg_splitter.rs b/src/rust/engine/options/src/arg_splitter.rs new file mode 100644 index 00000000000..27a33bfc3a6 --- /dev/null +++ b/src/rust/engine/options/src/arg_splitter.rs @@ -0,0 +1,94 @@ +// Copyright 2025 Pants project contributors (see CONTRIBUTORS.md). +// Licensed under the Apache License, Version 2.0 (see LICENSE). + +use lazy_static::lazy_static; +use regex::Regex; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +lazy_static! { + static ref SPEC_RE: Regex = Regex::new(r"[/\\.:*#]").unwrap(); + static ref SINGLE_DASH_FLAGS: HashSet<&'static str> = + HashSet::from(["-ltrace", "-ldebug", "-linfo", "-lwarn", "-lerror", "-h", "-v", "-V"]); +} + +#[derive(Debug, Eq, PartialEq)] +pub struct SplitArgs { + pub goals: Vec, // The requested known goals. + pub unknown_goals: Vec, // Any requested but unknown goals. + pub specs: Vec, // What to run against, e.g. targets or files/dirs. + pub passthru: Vec, // Any remaining args specified after a -- separator. +} + +pub struct ArgSplitter { + build_root: PathBuf, + known_goal_names: HashSet, +} + +impl ArgSplitter { + pub fn new<'a, I: IntoIterator>( + build_root: &Path, + known_goal_names: I, + ) -> ArgSplitter { + ArgSplitter { + build_root: build_root.to_owned(), + known_goal_names: HashSet::from_iter(known_goal_names.into_iter().map(str::to_string)), + } + } + + pub fn split_args(&self, args: Vec) -> SplitArgs { + let mut goals = vec![]; + let mut unknown_goals: Vec = vec![]; + let mut specs = vec![]; + let mut passthru = vec![]; + + let mut unconsumed_args = args; + unconsumed_args.reverse(); + // The first arg is the binary name, so skip it. + unconsumed_args.pop(); + + // Scan the args looking for goals and specs. + // The one hard case is a single word like `foo` with no path- or target-like qualities + // (e.g., a slash or a colon). It could be a goal, or it could be a top-level directory. + // We disambiguate thus: If it is a known goal name, assume the user intended a goal. + // Otherwise, if it looks like a path or target, or exists on the filesystem, assume + // the user intended a spec, otherwise it's an unknown goal. + // TODO: This is probably not good logic, since the CLI behavior will change based on + // changes to plugins (as they can introduce new goals) or changes on the filesystem. + // We might want to deprecate this behavior and consistently assume that these are goals, + // since the user can always add a `./` prefix to override. + while let Some(arg) = unconsumed_args.pop() { + // Some special flags, such as `-v` and `--help`, are implemented as + // goal aliases, so we must check this before checking for any dash prefixes. + if self.known_goal_names.contains(&arg) { + goals.push(arg); + } else if arg == "--" { + // Arg is the passthru delimiter. + for item in unconsumed_args.drain(..) { + passthru.push(item); + } + passthru.reverse(); + } else if !(arg.starts_with("--") || SINGLE_DASH_FLAGS.contains(arg.as_str())) { + // This is not a flag, so it must be an unknown goal or a spec (or a negative spec, + // which starts with a single dash, and we know is not a single dash flag). + if arg.starts_with("-") + || SPEC_RE.is_match(&arg) + || self.build_root.join(Path::new(&arg)).exists() + { + // Arg is a spec. + specs.push(arg); + } else { + // Arg is an unknown goal. + unknown_goals.push(arg); + } + } + } + + SplitArgs { + goals, + unknown_goals, + specs, + passthru, + } + } +} diff --git a/src/rust/engine/options/src/arg_splitter_tests.rs b/src/rust/engine/options/src/arg_splitter_tests.rs new file mode 100644 index 00000000000..641bfd48318 --- /dev/null +++ b/src/rust/engine/options/src/arg_splitter_tests.rs @@ -0,0 +1,293 @@ +// Copyright 2025 Pants project contributors (see CONTRIBUTORS.md). +// Licensed under the Apache License, Version 2.0 (see LICENSE). + +use crate::arg_splitter::{ArgSplitter, SplitArgs}; +use shlex; +use std::fs::File; +use std::path::Path; +use tempfile::TempDir; + +fn _sv(v: &[&str]) -> Vec { + v.iter().map(|s| String::from(*s)).collect() +} + +fn shlex_and_split_args(build_root: Option<&Path>, args_str: &str) -> SplitArgs { + ArgSplitter::new( + &build_root.unwrap_or(TempDir::new().unwrap().path()), + vec![ + "run", + "check", + "fmt", + "test", + "help", + "jvm", + "bsp", + "-v", + "-h", + "--help", + "--help-advanced", + ], + ) + .split_args(shlex::split(args_str).unwrap()) +} + +#[test] +fn test_spec_detection() { + fn assert_spec(build_root: Option<&Path>, maybe_spec: &str) { + assert_eq!( + SplitArgs { + goals: vec![], + unknown_goals: vec![], + specs: _sv(&[maybe_spec]), + passthru: vec![] + }, + shlex_and_split_args(build_root, &format!("pants {}", maybe_spec)) + ); + } + + fn assert_goal(build_root: Option<&Path>, spec: &str) { + assert_eq!( + SplitArgs { + goals: vec![], + unknown_goals: _sv(&[spec]), + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(build_root, &format!("pants {}", spec)) + ); + } + + let unambiguous_specs = [ + "a/b/c", + "a/b/c/", + "a/b:c", + "a/b/c.txt", + ":c", + "::", + "a/", + "./a.txt", + ".", + "*", + "a/b/*.txt", + "a/b/test*", + "a/**/*", + "a/b.txt:tgt", + "a/b.txt:../tgt", + "dir#gen", + "//:tgt#gen", + "cache.java", + "cache.tmp.java", + ]; + + let directories_vs_goals = ["foo", "a_b_c"]; + let temp_dir = TempDir::new().unwrap(); + + for spec in unambiguous_specs { + assert_spec(Some(temp_dir.path()), spec); + assert_spec(Some(temp_dir.path()), &format!("-{}", spec)); + } + for spec in directories_vs_goals { + assert_goal(Some(temp_dir.path()), spec); + File::create(temp_dir.path().join(Path::new(spec))).unwrap(); + assert_spec(Some(temp_dir.path()), spec); + assert_spec(Some(temp_dir.path()), &format!("-{}", spec)); + } +} + +#[test] +fn test_valid_arg_splits() { + fn assert(goals: &[&str], specs: &[&str], args_str: &str) { + assert_eq!( + SplitArgs { + goals: _sv(goals), + unknown_goals: vec![], + specs: _sv(specs), + passthru: vec![], + }, + shlex_and_split_args(None, args_str) + ) + } + + // Basic arg splitting, various flag combos. + + assert( + &["check", "test"], + &[ + "src/java/org/pantsbuild/foo", + "src/java/org/pantsbuild/bar:baz", + ], + "pants --check-long-flag --gg -ltrace check --cc test --ii \ + src/java/org/pantsbuild/foo src/java/org/pantsbuild/bar:baz", + ); + assert( + &["check", "test"], + &[ + "src/java/org/pantsbuild/foo", + "src/java/org/pantsbuild/bar:baz", + ], + "pants --fff=arg check --gg-gg=arg-arg test --iii --check-long-flag \ + src/java/org/pantsbuild/foo src/java/org/pantsbuild/bar:baz -ltrace --another-global", + ); + + // Distinguish goals from specs. + + assert(&["check", "test"], &["foo::"], "pants check test foo::"); + assert(&["check"], &["test:test"], "pants check test:test"); + assert(&["test"], &["test:test"], "pants test test:test"); + + assert(&["test"], &["./test"], "pants test ./test"); + assert(&["test"], &["//test"], "pants test //test"); + assert(&["test"], &["./test.txt"], "pants test ./test.txt"); + assert(&["test"], &["test/test.txt"], "pants test test/test.txt"); + assert(&["test"], &["test/test"], "pants test test/test"); + + assert(&["test"], &["."], "pants test ."); + assert(&["test"], &["*"], "pants test *"); + assert(&["test"], &["test/*.txt"], "pants test test/*.txt"); + assert(&["test"], &["test/**/*"], "pants test test/**/*"); + assert(&["test"], &["-"], "pants test -"); + assert(&["test"], &["-a/b"], "pants test -a/b"); + assert(&["test"], &["check.java"], "pants test check.java"); +} + +#[test] +fn test_passthru_args() { + assert_eq!( + SplitArgs { + goals: _sv(&["test"]), + unknown_goals: vec![], + specs: _sv(&["foo/bar"]), + passthru: _sv(&["-t", "this is the arg"]), + }, + shlex_and_split_args(None, "pants test foo/bar -- -t 'this is the arg'") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["check", "test"]), + unknown_goals: vec![], + specs: _sv(&[ + "src/java/org/pantsbuild/foo", + "src/java/org/pantsbuild/bar:baz" + ]), + passthru: _sv(&["passthru1", "passthru2", "-linfo"]), + }, + shlex_and_split_args( + None, + "pants -lerror --fff=arg check --gg-gg=arg-arg test --iii \ + --check-long-flag src/java/org/pantsbuild/foo \ + src/java/org/pantsbuild/bar:baz -- passthru1 passthru2 -linfo" + ) + ); +} + +#[test] +fn test_split_args_simple() { + assert_eq!( + SplitArgs { + goals: vec![], + unknown_goals: vec![], + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(None, "pants") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["help"]), + unknown_goals: vec![], + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(None, "pants help") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["fmt", "check"]), + unknown_goals: vec![], + specs: _sv(&["::"]), + passthru: vec![] + }, + shlex_and_split_args(None, "pants fmt check ::") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["fmt", "check"]), + unknown_goals: vec![], + specs: _sv(&["path/to/dir", "file.py", ":target",]), + passthru: vec![] + }, + shlex_and_split_args( + None, + "pants -ldebug --global-flag1 --global-flag2=val fmt \ + --scoped-flag1 check --scoped-flag2 path/to/dir file.py :target" + ) + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["run"]), + unknown_goals: vec![], + specs: _sv(&["path/to:bin"]), + passthru: vec![] + }, + shlex_and_split_args(None, "pants --global-flag1 run path/to:bin") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["-h"]), + unknown_goals: vec![], + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(None, "pants -h") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["test", "--help"]), + unknown_goals: vec![], + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(None, "pants test --help") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["test", "--help"]), + unknown_goals: vec![], + specs: vec![], + passthru: vec![] + }, + shlex_and_split_args(None, "pants test --help") + ); +} + +#[test] +fn test_split_args_short_flags() { + assert_eq!( + SplitArgs { + goals: _sv(&["run"]), + unknown_goals: vec![], + specs: _sv(&["path/to:bin"]), + passthru: vec![] + }, + shlex_and_split_args(None, "pants -lwarn run path/to:bin") + ); + + assert_eq!( + SplitArgs { + goals: _sv(&["run"]), + unknown_goals: vec![], + // An unknown short flag reads as a negative spec. + specs: _sv(&["-x", "path/to:bin"]), + passthru: vec![] + }, + shlex_and_split_args(None, "pants -x run path/to:bin") + ); +} diff --git a/src/rust/engine/options/src/lib.rs b/src/rust/engine/options/src/lib.rs index 6fc885d110d..b5cedb95daf 100644 --- a/src/rust/engine/options/src/lib.rs +++ b/src/rust/engine/options/src/lib.rs @@ -1,6 +1,10 @@ // Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). +pub mod arg_splitter; +#[cfg(test)] +mod arg_splitter_tests; + mod args; #[cfg(test)] mod args_tests; diff --git a/src/rust/engine/src/externs/options.rs b/src/rust/engine/src/externs/options.rs index ebce8c601e9..36cb6b42296 100644 --- a/src/rust/engine/src/externs/options.rs +++ b/src/rust/engine/src/externs/options.rs @@ -5,6 +5,7 @@ use pyo3::exceptions::PyException; use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString, PyTuple}; use pyo3::{prelude::*, BoundObject}; +use options::arg_splitter::{ArgSplitter, SplitArgs}; use options::{ apply_dict_edits, apply_list_edits, Args, ConfigSource, DictEdit, DictEditAction, Env, ListEdit, ListEditAction, ListOptionValue, OptionId, OptionParser, OptionalOptionValue, Scope, @@ -13,11 +14,13 @@ use options::{ use itertools::Itertools; use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; pyo3::import_exception!(pants.option.errors, ParseError); pub(crate) fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; Ok(()) @@ -146,6 +149,50 @@ impl PyOptionId { } } +#[pyclass] +struct PySplitArgs(SplitArgs); + +#[pymethods] +impl PySplitArgs { + fn goals(&self) -> &Vec { + &self.0.goals + } + + fn unknown_goals(&self) -> &Vec { + &self.0.unknown_goals + } + + fn specs(&self) -> &Vec { + &self.0.specs + } + + fn passthru(&self) -> &Vec { + &self.0.passthru + } +} + +#[pyclass] +struct PyArgSplitter(ArgSplitter); + +#[pymethods] +impl PyArgSplitter { + #[new] + fn __new__(build_root: &str, known_goal_names: Vec) -> PyResult { + Ok(Self(ArgSplitter::new( + &PathBuf::from(build_root), + known_goal_names + .iter() + .map(AsRef::as_ref) + .collect::>(), + ))) + } + + #[pyo3(signature = (args))] + fn split_args(&self, args: Vec) -> PyResult { + Ok(PySplitArgs(self.0.split_args(args))) + } +} + #[pyclass] struct PyConfigSource(ConfigSource); From 06a47956f8821ee55ae848d05e7d02900dbe31d3 Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Wed, 8 Jan 2025 19:58:36 -0500 Subject: [PATCH 2/4] lint --- src/python/pants/engine/internals/native_engine.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index c61209ea50f..437a1aced2f 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -670,7 +670,6 @@ class PyArgSplitter: class PyConfigSource: def __init__(self, path: str, content: bytes) -> None: ... - def split_args(self, args: list[str]) -> PySplitArgs: ... # See src/rust/engine/src/externs/options.rs for the Rust-side versions of these types. From 48bfaaa2c338d6398f3237a3d8fa7a7bfd357764 Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Fri, 10 Jan 2025 07:36:03 -0800 Subject: [PATCH 3/4] Code review iteration --- src/rust/engine/options/src/arg_splitter.rs | 2 +- src/rust/engine/src/externs/options.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/rust/engine/options/src/arg_splitter.rs b/src/rust/engine/options/src/arg_splitter.rs index 27a33bfc3a6..acb9f57d086 100644 --- a/src/rust/engine/options/src/arg_splitter.rs +++ b/src/rust/engine/options/src/arg_splitter.rs @@ -32,7 +32,7 @@ impl ArgSplitter { ) -> ArgSplitter { ArgSplitter { build_root: build_root.to_owned(), - known_goal_names: HashSet::from_iter(known_goal_names.into_iter().map(str::to_string)), + known_goal_names: known_goal_names.into_iter().map(str::to_string).collect(), } } diff --git a/src/rust/engine/src/externs/options.rs b/src/rust/engine/src/externs/options.rs index 36cb6b42296..dd026bebde6 100644 --- a/src/rust/engine/src/externs/options.rs +++ b/src/rust/engine/src/externs/options.rs @@ -14,12 +14,13 @@ use options::{ use itertools::Itertools; use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; +use std::path::Path; pyo3::import_exception!(pants.option.errors, ParseError); pub(crate) fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -177,19 +178,18 @@ struct PyArgSplitter(ArgSplitter); #[pymethods] impl PyArgSplitter { #[new] - fn __new__(build_root: &str, known_goal_names: Vec) -> PyResult { - Ok(Self(ArgSplitter::new( - &PathBuf::from(build_root), + fn __new__(build_root: &str, known_goal_names: Vec) -> Self { + Self(ArgSplitter::new( + &Path::new(build_root), known_goal_names .iter() .map(AsRef::as_ref) .collect::>(), - ))) + )) } - #[pyo3(signature = (args))] - fn split_args(&self, args: Vec) -> PyResult { - Ok(PySplitArgs(self.0.split_args(args))) + fn split_args(&self, args: Vec) -> PySplitArgs { + PySplitArgs(self.0.split_args(args)) } } From 0a18ce1eeb373f7752105e43cbc871ba4fb82ed2 Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Fri, 10 Jan 2025 14:59:46 -0800 Subject: [PATCH 4/4] Whoops --- src/rust/engine/src/externs/options.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/engine/src/externs/options.rs b/src/rust/engine/src/externs/options.rs index dd026bebde6..a00a9f215fb 100644 --- a/src/rust/engine/src/externs/options.rs +++ b/src/rust/engine/src/externs/options.rs @@ -180,7 +180,7 @@ impl PyArgSplitter { #[new] fn __new__(build_root: &str, known_goal_names: Vec) -> Self { Self(ArgSplitter::new( - &Path::new(build_root), + Path::new(build_root), known_goal_names .iter() .map(AsRef::as_ref)