-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add lagging agent concept to the volume #2524
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,11 +134,12 @@ struct TTestEnv | |
// only SSD/HDD distinction matters | ||
NProto::STORAGE_MEDIA_SSD_NONREPLICATED}, | ||
VolumeActorId, | ||
false, // muteIOErrors | ||
THashSet<TString>(), // freshDeviceIds | ||
TDuration::Zero(), // maxTimedOutDeviceStateDuration | ||
false, // maxTimedOutDeviceStateDurationOverridden | ||
true // useSimpleMigrationBandwidthLimiter | ||
false, // muteIOErrors | ||
THashSet<TString>(), // freshDeviceIds | ||
THashSet<TString>(), // laggingDeviceIds | ||
TDuration::Zero(), // maxTimedOutDeviceStateDuration | ||
false, // maxTimedOutDeviceStateDurationOverridden | ||
true // useSimpleMigrationBandwidthLimiter | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clang-format There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Он так и сделал |
||
); | ||
|
||
auto part = std::make_unique<TNonreplicatedPartitionActor>( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,6 +278,9 @@ message TDiskConfig | |
|
||
// A log of important events in the life of this disk. | ||
repeated TDiskHistoryItem History = 17; | ||
|
||
// A list of devices that are lagging behind on writes. | ||
repeated TLaggingDevice LaggingDevices = 18; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
@@ -417,6 +420,44 @@ message TAgentStats | |
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
||
message TLaggingDevice | ||
{ | ||
// UUID of the lagging device. | ||
string DeviceUUID = 1; | ||
|
||
// Index of the lagging device in the replica. | ||
uint32 RowIndex = 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. может удобнее тут иметь индекс реплики ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Я чет не придумал где оно сильно пригодиться может |
||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
||
message TLaggingAgent | ||
{ | ||
// Agent id. | ||
string AgentId = 1; | ||
|
||
// Node that agent is running on. | ||
uint32 NodeId = 2; | ||
|
||
// Index of the mirror disk replica. | ||
// 0 - main devices | ||
// 1,2 - replica devices | ||
uint32 ReplicaIndex = 3; | ||
|
||
// A list of devices that belong to the agent. | ||
repeated TLaggingDevice Devices = 4; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
||
message TLaggingAgentsInfo | ||
{ | ||
// A list of agents that lagging behind on writes. | ||
repeated TLaggingAgent Agents = 1; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
||
message TDiskRegistryAgentListRequestParams | ||
{ | ||
repeated string AgentIds = 1; | ||
|
@@ -635,6 +676,9 @@ message TAllocateDiskResponse | |
|
||
// New devices used instead of recently replaced ones. | ||
repeated string DeviceReplacementUUIDs = 8; | ||
|
||
// Devices that had been lagging. | ||
repeated TLaggingDevice RemovedLaggingDevices = 9; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
|
@@ -1662,6 +1706,27 @@ message TGetAgentNodeIdResponse | |
bool Connected = 4; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
// Report that some of the devices were lagging. | ||
|
||
message TAddLaggingDevicesRequest | ||
{ | ||
// Optional request headers. | ||
THeaders Headers = 1; | ||
|
||
// Disk identifier to perform operations on. | ||
string DiskId = 2; | ||
|
||
// Devices that has been lagging. | ||
repeated TLaggingDevice LaggingDevices = 3; | ||
} | ||
|
||
message TAddLaggingDevicesResponse | ||
{ | ||
// Optional error, set only if error happened. | ||
NCloud.NProto.TError Error = 1; | ||
} | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
// Get dependent disks | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Добавил в partition config список девайсов которые отстают.
Будет использоваться для двух вещей: mirror partition не читает из таких из-за
DevicesReadyForReading()
и в nonrepl partition тоже будет предохранитель "на всякий случай" от наших сервисных запросов.