From 99f44a85b8e618e2b02bbc25c5f818bff4c7792f Mon Sep 17 00:00:00 2001
From: Sean Kim <40130483+theskim@users.noreply.github.com>
Date: Wed, 27 Nov 2024 10:42:24 -0500
Subject: [PATCH] Normalize URL paths: convert /.//p, /..//p, and //p to p
---
url/src/lib.rs | 54 ++++++++++++++++++++++++++++++++-
url/tests/expected_failures.txt | 4 ---
2 files changed, 53 insertions(+), 5 deletions(-)
diff --git a/url/src/lib.rs b/url/src/lib.rs
index 3eacded1..b953d349 100644
--- a/url/src/lib.rs
+++ b/url/src/lib.rs
@@ -1757,6 +1757,39 @@ impl Url {
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
let cannot_be_a_base = self.cannot_be_a_base();
let scheme_type = SchemeType::from(self.scheme());
+ let mut path_empty = false;
+
+ // Check ':' and then see if the next character is '/'
+ let mut has_host = if let Some(index) = self.serialization.find(":") {
+ if self.serialization.len() > index + 1
+ && self.serialization.as_bytes().get(index + 1) == Some(&b'/')
+ {
+ let rest = &self.serialization[(index + ":/".len())..];
+ let host_part = rest.split('/').next().unwrap_or("");
+ path_empty = rest.is_empty();
+ !host_part.is_empty() && !host_part.contains('@')
+ } else {
+ false
+ }
+ } else {
+ false
+ };
+
+ // Ensure the path length is greater than 1 to account
+ // for cases where "/." is already appended from serialization
+ // If we set path, then we already checked the other two conditions:
+ // https://url.spec.whatwg.org/#url-serializing
+ // 1. The host is null
+ // 2. the first segment of the URL's path is an empty string
+ if path.len() > 1 {
+ if let Some(index) = self.serialization.find(":") {
+ let removal_start = index + ":".len();
+ if self.serialization[removal_start..].starts_with("/.") {
+ self.path_start -= "/.".len() as u32;
+ }
+ }
+ }
+
self.serialization.truncate(self.path_start as usize);
self.mutate(|parser| {
if cannot_be_a_base {
@@ -1766,7 +1799,6 @@ impl Url {
}
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
} else {
- let mut has_host = true; // FIXME
parser.parse_path_start(
scheme_type,
&mut has_host,
@@ -1774,6 +1806,26 @@ impl Url {
);
}
});
+
+ // For cases where normalization is applied across both the serialization and the path.
+ // Append "/." immediately after the scheme (up to ":")
+ // This is done if three conditions are met.
+ // https://url.spec.whatwg.org/#url-serializing
+ // 1. The host is null
+ // 2. The url's path length is greater than 1
+ // 3. the first segment of the URL's path is an empty string
+ if !has_host && path.len() > 1 && path_empty {
+ if let Some(index) = self.serialization.find(":") {
+ if self.serialization.len() > index + 2
+ && self.serialization.as_bytes().get(index + 1) == Some(&b'/')
+ && self.serialization.as_bytes().get(index + 2) == Some(&b'/')
+ {
+ self.serialization.insert_str(index + ":".len(), "/.");
+ self.path_start += "/.".len() as u32;
+ }
+ }
+ }
+
self.restore_after_path(old_after_path_pos, &after_path);
}
diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt
index 238f7a66..9bf60b34 100644
--- a/url/tests/expected_failures.txt
+++ b/url/tests/expected_failures.txt
@@ -41,7 +41,3 @@
set pathname to <\\\\>
set pathname to /\\/>
set pathname to /monkey/..//>
- set pathname to
- set pathname to
- set pathname to /p>
- set pathname to