From 085cb3bf6bd07b124250dd289b7c1e77d8561f0a Mon Sep 17 00:00:00 2001 From: Maximilian Moehl Date: Fri, 8 Nov 2024 14:12:55 +0100 Subject: [PATCH] feat: log local TCP address When troubleshooting issues related to keepalive it is often useful to be able to correlate multiple requests made over the same connection. A first step was to introduce the information on whether the request was made on a re-used connection or not which was done with a7dbf849. This commit adds the local address of the TCP connection which adds the capability to distinguish between multiple, parallel keep-alive connections by looking at the client port of gorouter for the backend connection. --- accesslog/schema/access_log_record.go | 14 ++++++++ accesslog/schema/access_log_record_test.go | 37 ++++++++++++++++++++++ config/config.go | 1 + handlers/access_log.go | 4 +++ handlers/requestinfo.go | 2 ++ proxy/round_tripper/proxy_round_tripper.go | 3 ++ proxy/round_tripper/trace.go | 23 ++++++++++---- 7 files changed, 77 insertions(+), 7 deletions(-) diff --git a/accesslog/schema/access_log_record.go b/accesslog/schema/access_log_record.go index 81286320..d10ba810 100644 --- a/accesslog/schema/access_log_record.go +++ b/accesslog/schema/access_log_record.go @@ -110,6 +110,7 @@ type AccessLogRecord struct { LogAttemptsDetails bool FailedAttempts int RoundTripSuccessful bool + ExtraFields []string record []byte // See the handlers.RequestInfo struct for details on these timings. @@ -124,6 +125,8 @@ type AccessLogRecord struct { TlsHandshakeFinishedAt time.Time AppRequestFinishedAt time.Time FinishedAt time.Time + + LocalAddress string } func (r *AccessLogRecord) formatStartedAt() string { @@ -319,6 +322,17 @@ func (r *AccessLogRecord) makeRecord(performTruncate bool) []byte { b.WriteDashOrFloatValue(r.successfulAttemptTime()) } + // We have to consider the impact of iterating over a list. This technically allows to repeat + // some of the fields but it allows us to iterate over the list only once instead of once per + // field when we perform a [slices.Contains] check. + for _, field := range r.ExtraFields { + switch field { + case "local_address": + b.WriteString(`local_address:`) + b.WriteDashOrStringValue(r.LocalAddress) + } + } + b.AppendSpaces(false) // #nosec G104 - ignore errors from writing the access log as it will only cause more errors to log this error b.WriteString(`x_cf_routererror:`) diff --git a/accesslog/schema/access_log_record_test.go b/accesslog/schema/access_log_record_test.go index 4978585b..34c7d867 100644 --- a/accesslog/schema/access_log_record_test.go +++ b/accesslog/schema/access_log_record_test.go @@ -314,6 +314,43 @@ var _ = Describe("AccessLogRecord", func() { Eventually(r).Should(Say(`x_cf_routererror:"-"`)) }) }) + + Context("when extra_fields is set to [local_address]", func() { + Context("and the local address is empty", func() { + It("makes a record with the local address set to -", func() { + record.ExtraFields = []string{"local_address"} + + r := BufferReader(bytes.NewBufferString(record.LogMessage())) + Eventually(r).Should(Say(`local_address:"-"`)) + }) + }) + Context("and the local address contains an address", func() { + It("makes a record with the local address set to that address", func() { + record.ExtraFields = []string{"local_address"} + record.LocalAddress = "10.0.0.1:34823" + + r := BufferReader(bytes.NewBufferString(record.LogMessage())) + Eventually(r).Should(Say(`local_address:"10.0.0.1:34823"`)) + }) + }) + }) + + Context("when extra_fields is set to [foobarbazz]", func() { + It("ignores it", func() { + record.ExtraFields = []string{"foobarbazz"} + record.LocalAddress = "10.0.0.1:34823" + + r := BufferReader(bytes.NewBufferString(record.LogMessage())) + Consistently(r).ShouldNot(Say("foobarbazz")) + }) + It("does not log local_address", func() { + record.ExtraFields = []string{"foobarbazz"} + record.LocalAddress = "10.0.0.1:34823" + + r := BufferReader(bytes.NewBufferString(record.LogMessage())) + Consistently(r).ShouldNot(Say(`local_address:"10.0.0.1:34823"`)) + }) + }) }) Describe("WriteTo", func() { diff --git a/config/config.go b/config/config.go index f2d33909..b22ba17b 100644 --- a/config/config.go +++ b/config/config.go @@ -186,6 +186,7 @@ type LoggingConfig struct { RedactQueryParams string `yaml:"redact_query_params"` EnableAttemptsDetails bool `yaml:"enable_attempts_details"` Format FormatConfig `yaml:"format"` + ExtraFields []string `yaml:"extra_fields"` // This field is populated by the `Process` function. JobName string `yaml:"-"` diff --git a/handlers/access_log.go b/handlers/access_log.go index 294bd969..7cf69f80 100644 --- a/handlers/access_log.go +++ b/handlers/access_log.go @@ -20,6 +20,7 @@ type accessLog struct { accessLogger accesslog.AccessLogger extraHeadersToLog []string logAttemptsDetails bool + extraFields []string logger *slog.Logger } @@ -46,6 +47,7 @@ func (a *accessLog) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http Request: r, ExtraHeadersToLog: a.extraHeadersToLog, LogAttemptsDetails: a.logAttemptsDetails, + ExtraFields: a.extraFields, } requestBodyCounter := &countingReadCloser{delegate: r.Body} @@ -82,6 +84,8 @@ func (a *accessLog) ServeHTTP(rw http.ResponseWriter, r *http.Request, next http alr.AppRequestFinishedAt = reqInfo.AppRequestFinishedAt alr.FinishedAt = reqInfo.FinishedAt + alr.LocalAddress = reqInfo.LocalAddress + a.accessLogger.Log(*alr) } diff --git a/handlers/requestinfo.go b/handlers/requestinfo.go index b0fd3be3..55bcc0fe 100644 --- a/handlers/requestinfo.go +++ b/handlers/requestinfo.go @@ -71,6 +71,8 @@ type RequestInfo struct { ShouldRouteToInternalRouteService bool FailedAttempts int + LocalAddress string + // RoundTripSuccessful will be set once a request has successfully reached a backend instance. RoundTripSuccessful bool diff --git a/proxy/round_tripper/proxy_round_tripper.go b/proxy/round_tripper/proxy_round_tripper.go index 2aae5bd3..78cc4792 100644 --- a/proxy/round_tripper/proxy_round_tripper.go +++ b/proxy/round_tripper/proxy_round_tripper.go @@ -191,6 +191,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response slog.Float64("dns-lookup-time", trace.DnsTime()), slog.Float64("dial-time", trace.DialTime()), slog.Float64("tls-handshake-time", trace.TlsTime()), + slog.String("local-addr", trace.LocalAddr()), ) if err != nil { @@ -255,6 +256,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response slog.Float64("dns-lookup-time", trace.DnsTime()), slog.Float64("dial-time", trace.DialTime()), slog.Float64("tls-handshake-time", trace.TlsTime()), + slog.String("local-addr", trace.LocalAddr()), ) if err != nil { @@ -347,6 +349,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response reqInfo.DialFinishedAt = trace.DialDone() reqInfo.TlsHandshakeStartedAt = trace.TlsStart() reqInfo.TlsHandshakeFinishedAt = trace.TlsDone() + reqInfo.LocalAddress = trace.LocalAddr() if res != nil && endpoint.PrivateInstanceId != "" && !requestSentToRouteService(request) { setupStickySession( diff --git a/proxy/round_tripper/trace.go b/proxy/round_tripper/trace.go index e358c335..2a9690f0 100644 --- a/proxy/round_tripper/trace.go +++ b/proxy/round_tripper/trace.go @@ -11,8 +11,9 @@ import ( // requestTracer holds trace data of a single request. type requestTracer struct { gotConn atomic.Bool - connInfo atomic.Pointer[httptrace.GotConnInfo] + connReused atomic.Bool wroteHeaders atomic.Bool + localAddr atomic.Pointer[string] // all times are stored as returned by time.Time{}.UnixNano() dnsStart atomic.Int64 @@ -26,8 +27,9 @@ type requestTracer struct { // Reset the trace data. Helpful when performing the same request again. func (t *requestTracer) Reset() { t.gotConn.Store(false) - t.connInfo.Store(nil) + t.connReused.Store(false) t.wroteHeaders.Store(false) + t.localAddr.Store(nil) t.dnsStart.Store(0) t.dnsDone.Store(0) t.dialStart.Store(0) @@ -49,11 +51,15 @@ func (t *requestTracer) WroteHeaders() bool { // ConnReused returns true if the traced request used an idle connection. // it returns false if no idle connection was used or if the information was unavailable. func (t *requestTracer) ConnReused() bool { - info := t.connInfo.Load() - if info != nil { - return info.Reused + return t.connReused.Load() +} + +func (t *requestTracer) LocalAddr() string { + p := t.localAddr.Load() + if p == nil { + return "" } - return false + return *p } func (t *requestTracer) DnsStart() time.Time { @@ -121,7 +127,10 @@ func traceRequest(req *http.Request) (*http.Request, *requestTracer) { r2 := req.WithContext(httptrace.WithClientTrace(req.Context(), &httptrace.ClientTrace{ GotConn: func(info httptrace.GotConnInfo) { t.gotConn.Store(true) - t.connInfo.Store(&info) + t.connReused.Store(info.Reused) + la := info.Conn.LocalAddr().String() + t.localAddr.Store(&la) + // FIXME: due to https://github.com/golang/go/issues/31259 this breaks our acceptance tests and is dangerous // disabled for now even though this will reduce the number of requests we can retry // if !info.Reused {