Skip to content

Commit

Permalink
rfc5321: split out banner_timeout from connect_timeout
Browse files Browse the repository at this point in the history
The motivation for this is:

My test environment is not permitted to reach outbound port 25.
If I run an ad-hoc test without setting up an explicit sink,
I end up with messages that try to reach the public internet.
Since they are blocked at a firewall, each of the MX hosts in
the connection plan is subject to a 60s wait before trying the next
thing.

In addition, this can cause the shutdown to take longer while
we wait for the in-flight delivery attempts to complete.

Making a separate configuration option allows the local administrator
to decide how to split the time waiting for a connection from
the time waiting for the banner.

refs: #196
  • Loading branch information
wez committed Jul 12, 2024
1 parent d2128ec commit b003e49
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 16 deletions.
4 changes: 4 additions & 0 deletions crates/kumo-api-types/src/shaping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ MergedEntry {
enable_dane: false,
client_timeouts: SmtpClientTimeouts {
connect_timeout: 60s,
banner_timeout: 60s,
ehlo_timeout: 300s,
mail_from_timeout: 300s,
rcpt_to_timeout: 300s,
Expand Down Expand Up @@ -865,6 +866,7 @@ MergedEntry {
enable_dane: false,
client_timeouts: SmtpClientTimeouts {
connect_timeout: 60s,
banner_timeout: 60s,
ehlo_timeout: 300s,
mail_from_timeout: 300s,
rcpt_to_timeout: 300s,
Expand Down Expand Up @@ -942,6 +944,7 @@ MergedEntry {
enable_dane: false,
client_timeouts: SmtpClientTimeouts {
connect_timeout: 60s,
banner_timeout: 60s,
ehlo_timeout: 300s,
mail_from_timeout: 300s,
rcpt_to_timeout: 300s,
Expand Down Expand Up @@ -1071,6 +1074,7 @@ MergedEntry {
enable_dane: false,
client_timeouts: SmtpClientTimeouts {
connect_timeout: 60s,
banner_timeout: 60s,
ehlo_timeout: 300s,
mail_from_timeout: 300s,
rcpt_to_timeout: 300s,
Expand Down
14 changes: 6 additions & 8 deletions crates/kumod/src/smtp_dispatcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,11 @@ impl SmtpDispatcher {
let tracer = self.tracer.clone();

async move {
let (stream, source_address) = egress_source
.connect_to(SocketAddr::new(address.addr, port))
.await?;
let (stream, source_address) = tokio::time::timeout(
timeouts.connect_timeout,
egress_source.connect_to(SocketAddr::new(address.addr, port)),
)
.await??;

tracing::debug!(
"connected to {address:?} port {port} via source address {source_address:?}"
Expand All @@ -280,7 +282,7 @@ impl SmtpDispatcher {

// Read banner
let banner = client
.read_response(None, timeouts.connect_timeout)
.read_response(None, timeouts.banner_timeout)
.await
.context("reading banner")?;
if banner.code != 220 {
Expand All @@ -291,11 +293,7 @@ impl SmtpDispatcher {
}
};

let connect_timeout = path_config.client_timeouts.connect_timeout;
let mut client = tokio::select! {
_ = tokio::time::sleep(connect_timeout) => {
anyhow::bail!("exceeded timeout of {connect_timeout:?}");
},
_ = shutdown.shutting_down() => anyhow::bail!("shutting down"),
client = make_connection => { client },
}
Expand Down
12 changes: 12 additions & 0 deletions crates/rfc5321/src/client_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ pub struct SmtpClientTimeouts {
)]
pub connect_timeout: Duration,

#[serde(
default = "SmtpClientTimeouts::default_banner_timeout",
with = "duration_serde"
)]
pub banner_timeout: Duration,

#[serde(
default = "SmtpClientTimeouts::default_ehlo_timeout",
with = "duration_serde"
Expand Down Expand Up @@ -66,6 +72,7 @@ impl Default for SmtpClientTimeouts {
fn default() -> Self {
Self {
connect_timeout: Self::default_connect_timeout(),
banner_timeout: Self::default_banner_timeout(),
ehlo_timeout: Self::default_ehlo_timeout(),
mail_from_timeout: Self::default_mail_from_timeout(),
rcpt_to_timeout: Self::default_rcpt_to_timeout(),
Expand All @@ -83,6 +90,9 @@ impl SmtpClientTimeouts {
fn default_connect_timeout() -> Duration {
Duration::from_secs(60)
}
fn default_banner_timeout() -> Duration {
Duration::from_secs(60)
}
fn default_auth_timeout() -> Duration {
Duration::from_secs(60)
}
Expand Down Expand Up @@ -115,6 +125,7 @@ impl SmtpClientTimeouts {
let short = Duration::from_secs(20);
Self {
connect_timeout: short,
banner_timeout: short,
ehlo_timeout: short,
mail_from_timeout: short,
rcpt_to_timeout: short,
Expand All @@ -130,6 +141,7 @@ impl SmtpClientTimeouts {
/// Compute theoretical maximum lifetime of a single message send
pub fn total_message_send_duration(&self) -> Duration {
self.connect_timeout
+ self.banner_timeout
+ self.ehlo_timeout
+ self.auth_timeout
+ self.mail_from_timeout
Expand Down
17 changes: 10 additions & 7 deletions crates/traffic-gen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,18 @@ impl Opt {
}

async fn make_client(&self) -> anyhow::Result<SmtpClient> {
let stream = TcpStream::connect(&self.target)
.await
.with_context(|| format!("connect to {}", self.target))?;
let mut client =
SmtpClient::with_stream(stream, &self.target, SmtpClientTimeouts::default());
let timeouts = SmtpClientTimeouts::default();

let stream =
tokio::time::timeout(timeouts.connect_timeout, TcpStream::connect(&self.target))
.await
.with_context(|| format!("timed out connecting to {}", self.target))?
.with_context(|| format!("failed to connect to {}", self.target))?;
let mut client = SmtpClient::with_stream(stream, &self.target, timeouts);

// Read banner
let connect_timeout = client.timeouts().connect_timeout;
let banner = client.read_response(None, connect_timeout).await?;
let banner_timeout = client.timeouts().banner_timeout;
let banner = client.read_response(None, banner_timeout).await?;
anyhow::ensure!(banner.code == 220, "unexpected banner: {banner:#?}");

// Say EHLO
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
a multiarch image, supporting both `linux/amd64` and `linux/arm64`
architectures. Simply use `docker pull ghcr.io/kumocorp/kumomta-dev:latest`
to get the appropriate architecture.
* Split out the banner waiting portion of
[connect_timeout](../reference/kumo/make_egress_path.md#connect_timeout) into
a new [banner_timeout](../reference/kumo/make_egress_path.md#banner_timeout)
option to make it easier to manage the system behavior if, for example, the
connection is blocked by a firewall. You can now set the connection timeout
to a smaller value while keeping the banner timeout at a more RFC-compliant,
longer, value. #196
* New [kcli trace-smtp-client](../reference/kcli/trace-smtp-client.md)
diagnostic command for observing outbound SMTP sessions. #87
* New [Extended configuration validation mode](../userguide/configuration/policy_helpers.md#validating-your-configuration). #211
Expand Down
18 changes: 17 additions & 1 deletion docs/reference/kumo/make_egress_path.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,24 @@ The following nine settings control the timeouts waiting for responses to variou
The value is specified as a integer in seconds, or as a string using syntax
like `"2min"` for a two minute duration.

## banner_timeout

{{since('dev')}}

How long to wait between a connection being established and receiving a 220
from a receiving host. The default is `60s`.

In earlier versions of KumoMTA, this was rolled together into the `connect_timeout`.

## connect_timeout
How long to wait between starting an SMTP connection and receiving a 220 from a receiving host. The default is `60s`.

How long to wait between starting an SMTP connection and receiving a 220 from a
receiving host. The default is `60s`.

{{since('dev', inline=True)}}
The `connect_timeout` is now purely focused on the time it takes to
establish a working connection. The time allowed for receiving the
initial 220 banner has been separated out into `banner_timeout`.

## starttls_timeout
How long to wait for a response after issuing a STARTTLS comand. The default is `5s`.
Expand Down
1 change: 1 addition & 0 deletions simple_policy.lua
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ kumo.on(
idle_timeout = '25s',
data_timeout = '20s',
data_dot_timeout = '25s',
connect_timeout = '5s',
connection_limit = 32,
-- max_connection_rate = '1/s',
-- max_ready = 16,
Expand Down

0 comments on commit b003e49

Please sign in to comment.