Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Internal] Diagnostics: Adds Merge API that combines several CosmosTraceDiagnostics Instances #4175

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 130 additions & 10 deletions Microsoft.Azure.Cosmos/src/Diagnostics/CosmosTraceDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ namespace Microsoft.Azure.Cosmos.Diagnostics
internal sealed class CosmosTraceDiagnostics : CosmosDiagnostics
{
private readonly Lazy<ServerSideCumulativeMetrics> accumulatedMetrics;
private readonly List<CosmosTraceDiagnostics> traceDiagnostics;
private readonly bool isMergedDiagnostics;

public CosmosTraceDiagnostics(ITrace trace)
{
Expand All @@ -34,33 +36,117 @@ public CosmosTraceDiagnostics(ITrace trace)

this.Value = rootTrace;
this.accumulatedMetrics = new Lazy<ServerSideCumulativeMetrics>(() => PopulateServerSideCumulativeMetrics(this.Value));
this.isMergedDiagnostics = false;
}

public CosmosTraceDiagnostics(List<CosmosTraceDiagnostics> traceDiagnostics)
{
this.traceDiagnostics = traceDiagnostics;
this.isMergedDiagnostics = true;

TraceSummary traceSummary = new TraceSummary();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the summary for every merge?

ex:
ReadItem -> Inside RequestInvoker, will it include a summary?
Queries -> Which might go multiple NW interactions, will every interaction have its own summary?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summar of offline sync-up: For initial version having the simple version with hint/context to dis-ambiguate if it's a hedging or not is good enough.

traceSummary.SetRegionsContacted(this.GetContactedRegions());
traceSummary.SetFailedRequestCount(this.GetFailedRequestCount());

this.Value = new MergedTrace(
new List<ITrace>(this.traceDiagnostics.Select(trace => trace.Value)),
this.GetStartTimeUtc().Value,
this.GetClientElapsedTime(),
traceSummary);
}

public ITrace Value { get; }

public override string ToString()
{
return this.ToJsonString();
if (!this.isMergedDiagnostics)
{
return this.ToJsonString();
}

return this.MultiDiagnosticsToJsonString();
}

public override TimeSpan GetClientElapsedTime()
{
return this.Value.Duration;
if (!this.isMergedDiagnostics)
{
return this.Value.Duration;
}

TimeSpan maxElpasedTime = TimeSpan.Zero;
foreach (CosmosTraceDiagnostics trace in this.traceDiagnostics)
{
maxElpasedTime += trace.GetClientElapsedTime();
NaluTripician marked this conversation as resolved.
Show resolved Hide resolved
}

return maxElpasedTime;
}

public override IReadOnlyList<(string regionName, Uri uri)> GetContactedRegions()
{
return this.Value?.Summary?.RegionsContacted;
if (!this.isMergedDiagnostics)
{
return this.Value?.Summary?.RegionsContacted;
}

HashSet<(string regionName, Uri uri)> contactedRegions = null;
foreach (CosmosTraceDiagnostics trace in this.traceDiagnostics)
{
if (contactedRegions == null)
{
contactedRegions = new HashSet<(string regionName, Uri uri)>(trace.GetContactedRegions());
}
else
{
foreach ((string regionName, Uri uri) in trace.GetContactedRegions())
{
contactedRegions.Add((regionName, uri));
}

}
}

return contactedRegions.ToList().AsReadOnly();
}

public override ServerSideCumulativeMetrics GetQueryMetrics()
{
return this.accumulatedMetrics.Value;
if (!this.isMergedDiagnostics)
{
return this.accumulatedMetrics.Value;
}

ServerSideMetricsInternalAccumulator accumulator = new ServerSideMetricsInternalAccumulator();
foreach (CosmosTraceDiagnostics traceDiagnostics in this.traceDiagnostics)
{
ServerSideMetricsInternalAccumulator.WalkTraceTreeForQueryMetrics(traceDiagnostics.GetTrace(), accumulator);
}

IReadOnlyList<ServerSidePartitionedMetricsInternal> serverSideMetricsList = accumulator.GetPartitionedServerSideMetrics().Select(metrics => new ServerSidePartitionedMetricsInternal(metrics)).ToList();

ServerSideCumulativeMetrics accumulatedMetrics = new ServerSideCumulativeMetricsInternal(serverSideMetricsList);
return accumulatedMetrics.PartitionedMetrics.Count != 0 ? accumulatedMetrics : null;
}

internal ITrace GetTrace()
{
return this.Value;
}

internal bool IsGoneExceptionHit()
{
return this.WalkTraceTreeForGoneException(this.Value);
if (!this.isMergedDiagnostics)
{
return this.WalkTraceTreeForGoneException(this.Value);
}

bool isGoneExceptionHit = false;
foreach (CosmosTraceDiagnostics trace in this.traceDiagnostics)
{
isGoneExceptionHit |= trace.IsGoneExceptionHit();
}
return isGoneExceptionHit;
}

private bool WalkTraceTreeForGoneException(ITrace currentTrace)
Expand Down Expand Up @@ -121,12 +207,23 @@ private static ServerSideCumulativeMetrics PopulateServerSideCumulativeMetrics(I

public override DateTime? GetStartTimeUtc()
{
if (this.Value == null || this.Value.StartTime == null)
if (!this.isMergedDiagnostics)
{
if (this.Value == null || this.Value.StartTime == null)
{
return null;
}

return this.Value.StartTime;
}

DateTime minStartTime = DateTime.MaxValue;
foreach (CosmosTraceDiagnostics trace in this.traceDiagnostics)
{
return null;
minStartTime = minStartTime < trace.GetStartTimeUtc().Value ? minStartTime : trace.GetStartTimeUtc().Value;
}

return this.Value.StartTime;
return minStartTime == DateTime.MaxValue ? null : minStartTime;
}

public override int GetFailedRequestCount()
Expand All @@ -136,7 +233,30 @@ public override int GetFailedRequestCount()
return 0;
}

return this.Value.Summary.GetFailedCount();
}
if (!this.isMergedDiagnostics)
{
return this.Value.Summary.GetFailedCount();
}

int failedRequestCount = 0;
foreach (CosmosTraceDiagnostics trace in this.traceDiagnostics)
{
failedRequestCount += trace.GetFailedRequestCount();
}
return failedRequestCount;
}

private string MultiDiagnosticsToJsonString()
{
ReadOnlyMemory<byte> utf8String = this.WriteTracesToJsonWriter(JsonSerializationFormat.Text);
return Encoding.UTF8.GetString(utf8String.Span);
}

private ReadOnlyMemory<byte> WriteTracesToJsonWriter(JsonSerializationFormat jsonSerializationFormat)
{
IJsonWriter jsonTextWriter = JsonWriter.Create(jsonSerializationFormat);
TraceWriter.WriteTrace(jsonTextWriter, this.Value);
return jsonTextWriter.GetResult();
}
}
}
6 changes: 6 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/ITrace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,11 @@ ITrace StartChild(
/// <param name="trace">Existing trace.</param>
void AddChild(ITrace trace);

/// <summary>
/// Removes client config from the trace. Used for multitrace instances
/// </summary>
/// <returns>a bool representing if remove was sucessfull</returns>
bool TryRemoveClientConfig();

}
}
143 changes: 143 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/MergedTrace.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// ------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// ------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Tracing
{
using System;
using System.Collections.Generic;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using static Microsoft.Azure.Cosmos.Tracing.TraceData.ClientSideRequestStatisticsTraceDatum;

internal sealed class MergedTrace : ITrace
{
private static readonly IReadOnlyDictionary<string, object> EmptyDictionary = new Dictionary<string, object>();
private readonly List<ITrace> children;
private readonly Lazy<Dictionary<string, object>> data;

public MergedTrace(
List<ITrace> traces,
DateTime startTime,
TimeSpan elapsedTime,
TraceSummary summary)
{
this.children = traces;
this.Id = Guid.NewGuid();
this.StartTime = startTime;
this.Duration = elapsedTime;
this.Summary = summary ?? throw new ArgumentNullException(nameof(summary));
this.data = new Lazy<Dictionary<string, object>>();

foreach (ITrace trace in traces)
{
if (trace.Data.Count > 0
&& !this.data.Value.ContainsKey("Client Configuration")
&& trace.Data.TryGetValue("Client Configuration", out object clientConfiguration))
{
this.data.Value.Add("Client Configuration", clientConfiguration);
}
trace.TryRemoveClientConfig();

if (this.data.Value.TryGetValue("totalRequestCharge", out object totalRequestCharge))
{
this.data.Value["totalRequestCharge"] = (double)totalRequestCharge + this.GetTraceRequestCharge(trace);
}
else
{
this.data.Value.Add("totalRequestCharge", this.GetTraceRequestCharge(trace));
}
}
}

private double GetTraceRequestCharge(ITrace trace)
{
double requestCharge = 0;
foreach (ITrace child in trace.Children)
{
if (child.Data.TryGetValue("Client Side Request Stats", out object clientSideRequestStats))
{
foreach (StoreResponseStatistics storeResponseStatistics in ((ClientSideRequestStatisticsTraceDatum)clientSideRequestStats).StoreResponseStatisticsList)
{
requestCharge += storeResponseStatistics.StoreResult.RequestCharge;
}
}
else
{
requestCharge += this.GetTraceRequestCharge(child);
}
}

return requestCharge;
}
public string Name => "Multi-request Trace Instance: " + this.Id.ToString();

public Guid Id { get; }

public DateTime StartTime { get; }

public TimeSpan Duration { get; }

public TraceLevel Level => default;

public TraceComponent Component => default;

public TraceSummary Summary { get; }

public ITrace Parent => null;

public IReadOnlyList<ITrace> Children => this.children;

public IReadOnlyDictionary<string, object> Data => this.data.IsValueCreated ? this.data.Value : MergedTrace.EmptyDictionary;

public void AddChild(ITrace child)
{
lock (this.children)
{
this.children.Add(child);
}
}

public void AddDatum(string key, TraceDatum traceDatum)
{
this.data.Value.Add(key, traceDatum);
this.Summary.UpdateRegionContacted(traceDatum);
}

public void AddDatum(string key, object value)
{
this.data.Value.Add(key, value);
}

public void AddOrUpdateDatum(string key, object value)
{
this.data.Value[key] = value;
}

public void Dispose()
{
// No Op
}

public ITrace StartChild(
string name)
{
return this.StartChild(
name,
component: this.Component,
level: TraceLevel.Info);
}

public ITrace StartChild(
string name,
TraceComponent component,
TraceLevel level)
{
return this;
}

public bool TryRemoveClientConfig()
{
return this.data.Value.Remove("Client Configuration");
}
}
}
5 changes: 5 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/NoOpTrace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,10 @@ public void UpdateRegionContacted(TraceDatum traceDatum)
{
// NoOp
}

public bool TryRemoveClientConfig()
{
return false;
}
}
}
5 changes: 5 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/Trace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,5 +137,10 @@ public void AddOrUpdateDatum(string key, object value)
{
this.data.Value[key] = value;
}

public bool TryRemoveClientConfig()
{
return this.data.Value.Remove("Client Configuration");
}
}
}
14 changes: 14 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/TraceSummary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,19 @@ public void AddRegionContacted(string regionName, Uri locationEndpoint)
}
}

internal void SetRegionsContacted(IReadOnlyList<(string, Uri)> regionsContacted)
{
lock (this.regionContactedInternal)
{
this.regionContactedInternal.Clear();
this.regionContactedInternal.UnionWith(regionsContacted);
}
}

internal void SetFailedRequestCount(int failedRequestCount)
{
this.failedRequestCount = failedRequestCount;
}

}
}
Loading
Loading