Skip to content

Commit

Permalink
issue-2774: add metrics to user_stats; always report Time sensor and …
Browse files Browse the repository at this point in the history
…histogram (#2797)

* add metrics to user_stats; always report Time sensor and histogram

* add counters test file

* fix

* fix
  • Loading branch information
WilyTiger authored Jan 7, 2025
1 parent 4f702df commit fbf6b9f
Show file tree
Hide file tree
Showing 5 changed files with 1,984 additions and 210 deletions.
105 changes: 97 additions & 8 deletions cloud/filestore/libs/diagnostics/user_counter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace {

////////////////////////////////////////////////////////////////////////////////

// Read/Write counters
constexpr TStringBuf FILESTORE_READ_OPS = "filestore.read_ops";
constexpr TStringBuf FILESTORE_READ_OPS_BURST = "filestore.read_ops_burst";
constexpr TStringBuf FILESTORE_READ_BYTES = "filestore.read_bytes";
Expand All @@ -23,8 +24,12 @@ constexpr TStringBuf FILESTORE_WRITE_BYTES = "filestore.write_bytes";
constexpr TStringBuf FILESTORE_WRITE_BYTES_BURST = "filestore.write_bytes_burst";
constexpr TStringBuf FILESTORE_WRITE_LATENCY = "filestore.write_latency";
constexpr TStringBuf FILESTORE_WRITE_ERRORS = "filestore.write_errors";
constexpr TStringBuf FILESTORE_INDEX_OPS = "filestore.index_ops";
constexpr TStringBuf FILESTORE_INDEX_ERRORS = "filestore.index_errors";

// Index operation counters
constexpr TStringBuf FILESTORE_INDEX_OPS = "filestore.index_ops";
constexpr TStringBuf FILESTORE_INDEX_LATENCY = "filestore.index_latency";
constexpr TStringBuf FILESTORE_INDEX_ERRORS = "filestore.index_errors";
constexpr TStringBuf FILESTORE_INDEX_CUMULATIVE_TIME = "filestore.index_cumulative_time";

////////////////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -214,6 +219,44 @@ TLabels MakeFilestoreLabels(
{"instance", instanceId}};
}

const THashMap<TString, TString>& GetIndexOpsNames()
{
static const THashMap<TString, TString> names = {
{"AllocateData", "fallocate"},
{"CreateHandle", "open"},
{"CreateNode", "createnode"},
{"DestroyHandle", "release"},
{"GetNodeAttr", "getattr"},
{"GetNodeXAttr", "getxattr"},
{"ListNodeXAttr", "listxattr"},
{"ListNodes", "readdir"},
{"RenameNode", "rename"},
{"SetNodeAttr", "setattr"},
{"SetNodeXAttr", "setxattr"},
{"UnlinkNode", "unlink"},
{"StatFileStore", "statfs"},
{"ReadLink", "readlink"},
{"AccessNode", "access"},
{"RemoveNodeXAttr", "removexattr"},
{"ReleaseLock", "releaselock"},
{"AcquireLock", "acquirelock"}};

return names;
}

TLabels MakeFilestoreLabelsWithRequestName(
const TString& cloudId,
const TString& folderId,
const TString& filestoreId,
const TString& instanceId,
const TString& requestName)
{
auto labels =
MakeFilestoreLabels(cloudId, folderId, filestoreId, instanceId);
labels.Add("request", requestName);
return labels;
}

} // namespace

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -308,12 +351,45 @@ void RegisterFilestore(
request.first.LabelValue != "ReadData" &&
request.first.LabelValue != "WriteData")
{
indexOpsCounters.emplace_back(
src->FindSubgroup("request", request.first.LabelValue),
"Count");
indexErrorCounters.emplace_back(
src->FindSubgroup("request", request.first.LabelValue),
"Errors/Fatal");
const auto indexSubgroup =
src->FindSubgroup("request", request.first.LabelValue);

indexOpsCounters.emplace_back(indexSubgroup, "Count");
indexErrorCounters.emplace_back(indexSubgroup, "Errors/Fatal");

auto metricName =
GetIndexOpsNames().find(request.first.LabelValue);
if (metricName) {
const auto labels = MakeFilestoreLabelsWithRequestName(
cloudId,
folderId,
filestoreId,
instanceId,
metricName->second);
AddUserMetric(
dsc,
labels,
{ { indexSubgroup, "Count" } },
FILESTORE_INDEX_OPS);
AddUserMetric(
dsc,
labels,
{ { indexSubgroup, "Time" } },
FILESTORE_INDEX_CUMULATIVE_TIME
);
AddHistogramUserMetric(
MS_BUCKETS,
dsc,
labels,
{ indexSubgroup, "Time" },
FILESTORE_INDEX_LATENCY);
AddUserMetric(
dsc,
labels,
{ { indexSubgroup, "Errors/Fatal" } },
FILESTORE_INDEX_ERRORS
);
}
}
}

Expand Down Expand Up @@ -355,6 +431,19 @@ void UnregisterFilestore(

dsc.RemoveUserMetric(commonLabels, FILESTORE_INDEX_OPS);
dsc.RemoveUserMetric(commonLabels, FILESTORE_INDEX_ERRORS);

for (const auto& [_, requestName]: GetIndexOpsNames()) {
const auto labels = MakeFilestoreLabelsWithRequestName(
cloudId,
folderId,
filestoreId,
instanceId,
requestName);
dsc.RemoveUserMetric(labels, FILESTORE_INDEX_OPS);
dsc.RemoveUserMetric(labels, FILESTORE_INDEX_CUMULATIVE_TIME);
dsc.RemoveUserMetric(labels, FILESTORE_INDEX_LATENCY);
dsc.RemoveUserMetric(labels, FILESTORE_INDEX_ERRORS);
}
}

} // NCloud::NFileStore::NUserCounter
196 changes: 2 additions & 194 deletions cloud/filestore/libs/diagnostics/user_counter_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <library/cpp/monlib/encode/json/json.h>
#include <library/cpp/monlib/encode/spack/spack_v1.h>
#include <library/cpp/monlib/encode/text/text.h>
#include <library/cpp/resource/resource.h>
#include <library/cpp/testing/unittest/registar.h>

namespace NCloud::NFileStore::NUserCounter {
Expand Down Expand Up @@ -120,200 +121,7 @@ Y_UNIT_TEST_SUITE(TUserWrapperTest)
const TString fsId = "test_fs";
const TString clientId = "test_client";

const TString testResult = R"--({
"sensors":[
{
"kind":"GAUGE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_bytes_burst"
},
"ts":12,
"value":0
},
{
"kind":"GAUGE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_ops_burst"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_ops"
},
"ts":12,
"value":0
},
{
"kind":"HIST_RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_latency"
},
"ts":12,
"hist":{
"bounds":[1,2,5,10,20,50,100,200,500,1000,2000,5000,10000,35000],
"buckets":[0,0,0,0,0,0,0,0,0,0,0,0,0,0],
"inf":0
}
},
{
"kind":"HIST_RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_latency"
},
"ts":12,
"hist":{
"bounds":[1,2,5,10,20,50,100,200,500,1000,2000,5000,10000,35000],
"buckets":[0,0,0,0,0,0,0,0,0,0,0,0,0,0],
"inf":0
}
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.index_errors"
},
"ts":12,
"value":0
},
{
"kind":"GAUGE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_bytes_burst"
},
"ts":12,
"value":0
},
{
"kind":"GAUGE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_ops_burst"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_errors"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_bytes"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.index_ops"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_ops"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.write_errors"
},
"ts":12,
"value":0
},
{
"kind":"RATE",
"labels":{
"service":"compute",
"project":"cloud",
"cluster":"folder",
"filestore":"test_fs",
"instance":"test_client",
"name":"filestore.read_bytes"
},
"ts":12,
"value":0
}
]
})--";
const TString testResult = NResource::Find("counters.json");
auto testJson = NJson::ReadJsonFastTree(testResult, true);
auto emptyJson = NJson::ReadJsonFastTree("{}", true);

Expand Down
Loading

0 comments on commit fbf6b9f

Please sign in to comment.