Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A0-3022: Abft Score Metrics #1898

Merged
merged 7 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions bin/chain-bootstrapper/src/chain_spec/cli.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use primitives::{AccountId, Version as FinalityVersion, LEGACY_FINALITY_VERSION};
use primitives::{
AccountId, Version as FinalityVersion, CURRENT_FINALITY_VERSION, LEGACY_FINALITY_VERSION,
};
use sc_chain_spec::ChainType;
use sc_cli::clap::{self, Args};

Expand Down Expand Up @@ -43,8 +45,8 @@ pub struct ChainSpecParams {
rich_account_ids: Option<Vec<AccountId>>,

/// Finality version at chain inception.
#[arg(long, default_value = LEGACY_FINALITY_VERSION.to_string())]
finality_version: FinalityVersion,
#[arg(long, default_value = "legacy")]
finality_version: String,
}

impl ChainSpecParams {
Expand Down Expand Up @@ -81,6 +83,11 @@ impl ChainSpecParams {
}

pub fn finality_version(&self) -> FinalityVersion {
self.finality_version
match self.finality_version.as_str() {
"current" => CURRENT_FINALITY_VERSION,
"legacy" => LEGACY_FINALITY_VERSION,
_ => panic!("finality version should be 'current' or 'legacy'"),
}
.into()
}
}
8 changes: 8 additions & 0 deletions finality-aleph/src/abft/current/performance/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::{
LOG_TARGET,
},
data_io::AlephData,
metrics::ScoreMetrics,
party::manager::Runnable,
Hasher, UnverifiedHeader,
};
Expand Down Expand Up @@ -62,8 +63,10 @@ pub struct Service<UH>
where
UH: UnverifiedHeader,
{
my_index: usize,
timorleph marked this conversation as resolved.
Show resolved Hide resolved
batches_from_abft: mpsc::UnboundedReceiver<Batch<UH>>,
scorer: Scorer,
metrics: ScoreMetrics,
}

impl<UH> Service<UH>
Expand All @@ -73,8 +76,10 @@ where
/// Create a new service, together with a unit finalization handler that should be passed to
/// ABFT. It will wrap the provided finalization handler and call it in the background.
pub fn new<FH>(
my_index: usize,
n_members: usize,
finalization_handler: FH,
metrics: ScoreMetrics,
) -> (
Self,
impl current_aleph_bft::UnitFinalizationHandler<Data = AlephData<UH>, Hasher = Hasher>,
Expand All @@ -85,8 +90,10 @@ where
let (batches_for_us, batches_from_abft) = mpsc::unbounded();
(
Service {
my_index,
batches_from_abft,
scorer: Scorer::new(NodeCount(n_members)),
metrics,
},
FinalizationWrapper::new(finalization_handler, batches_for_us),
)
Expand All @@ -110,6 +117,7 @@ where
},
};
debug!(target: LOG_TARGET, "Received ABFT score: {:?}.", score);
self.metrics.report_score(score[self.my_index]);
// TODO(A0-4339): sometimes submit these scores to the chain.
}
_ = &mut exit => {
Expand Down
31 changes: 31 additions & 0 deletions finality-aleph/src/metrics/abft_score.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use substrate_prometheus_endpoint::{register, Gauge, PrometheusError, Registry, U64};

#[derive(Clone)]
pub enum ScoreMetrics {
Prometheus { my_score: Gauge<U64> },
Noop,
}

impl ScoreMetrics {
pub fn new(registry: Option<Registry>) -> Result<Self, PrometheusError> {
match registry {
Some(registry) => Ok(ScoreMetrics::Prometheus {
my_score: register(
Gauge::new("my_abft_score", "My abft score observed in last batch")?,
&registry,
)?,
}),
None => Ok(ScoreMetrics::Noop),
}
}

pub fn noop() -> Self {
ScoreMetrics::Noop
}

pub fn report_score(&self, score: u16) {
if let ScoreMetrics::Prometheus { my_score } = self {
my_score.set(score.into());
}
}
}
2 changes: 2 additions & 0 deletions finality-aleph/src/metrics/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod abft_score;
mod best_block;
mod finality_rate;
mod slo;
mod timing;
pub mod transaction_pool;

pub use abft_score::ScoreMetrics;
pub use slo::{run_metrics_service, SloMetrics};
pub use timing::{Checkpoint, DefaultClock};
pub type TimingBlockMetrics = timing::TimingBlockMetrics<DefaultClock>;
Expand Down
10 changes: 8 additions & 2 deletions finality-aleph/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
crypto::AuthorityPen,
finalization::AlephFinalizer,
idx_to_account::ValidatorIndexToAccountIdConverterImpl,
metrics::{run_metrics_service, SloMetrics},
metrics::{run_metrics_service, ScoreMetrics, SloMetrics},
network::{
address_cache::validator_address_cache_updater,
session::{ConnectionManager, ConnectionManagerConfig},
Expand Down Expand Up @@ -146,6 +146,11 @@ where

let chain_events = client.chain_status_notifier();

let score_metrics = ScoreMetrics::new(registry.clone()).unwrap_or_else(|e| {
debug!(target: LOG_TARGET, "Failed to create metrics: {}.", e);
ScoreMetrics::noop()
});

let slo_metrics = SloMetrics::new(registry.as_ref(), chain_status.clone());
let timing_metrics = slo_metrics.timing_metrics().clone();

Expand Down Expand Up @@ -199,7 +204,7 @@ where
verifier.clone(),
session_info.clone(),
sync_io,
registry.clone(),
registry,
slo_metrics,
favourite_block_user_requests,
) {
Expand Down Expand Up @@ -266,6 +271,7 @@ where
spawn_handle,
connection_manager,
keystore,
score_metrics,
),
session_info,
});
Expand Down
28 changes: 18 additions & 10 deletions finality-aleph/src/party/manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
},
crypto::{AuthorityPen, AuthorityVerifier},
data_io::{ChainTracker, DataStore, OrderedDataInterpreter, SubstrateChainInfoProvider},
metrics::TimingBlockMetrics,
metrics::{ScoreMetrics, TimingBlockMetrics},
mpsc,
network::{
data::{
Expand All @@ -32,6 +32,7 @@ use crate::{
},
party::{
backup::ABFTBackup, manager::aggregator::AggregatorVersion, traits::NodeSessionManager,
LOG_TARGET,
},
sync::JustificationSubmissions,
AuthorityId, BlockId, CurrentRmcNetworkData, Keychain, LegacyRmcNetworkData, NodeIndex,
Expand Down Expand Up @@ -114,6 +115,7 @@ where
spawn_handle: SpawnHandle,
session_manager: SM,
keystore: Arc<LocalKeystore>,
score_metrics: ScoreMetrics,
_phantom: PhantomData<(B, H)>,
}

Expand Down Expand Up @@ -146,6 +148,7 @@ where
spawn_handle: SpawnHandle,
session_manager: SM,
keystore: Arc<LocalKeystore>,
score_metrics: ScoreMetrics,
) -> Self {
Self {
client,
Expand All @@ -161,6 +164,7 @@ where
spawn_handle,
session_manager,
keystore,
score_metrics,
_phantom: PhantomData,
}
}
Expand Down Expand Up @@ -274,8 +278,12 @@ where
self.verifier.clone(),
session_boundaries.clone(),
);
let (abft_performance, abft_batch_handler) =
CurrentPerformanceService::new(n_members, ordered_data_interpreter);
let (abft_performance, abft_batch_handler) = CurrentPerformanceService::new(
node_id.into(),
n_members,
ordered_data_interpreter,
self.score_metrics.clone(),
);
let consensus_config =
current_create_aleph_config(n_members, node_id, session_id, self.unit_creation_delay);
let data_network = data_network.map();
Expand Down Expand Up @@ -325,7 +333,7 @@ where
exit_rx: oneshot::Receiver<()>,
backup: ABFTBackup,
) -> Subtasks {
debug!(target: "afa", "Authority task {:?}", session_id);
debug!(target: LOG_TARGET, "Authority task {:?}", session_id);

let authority_verifier = AuthorityVerifier::new(authorities.to_vec());
let authority_pen =
Expand Down Expand Up @@ -389,17 +397,17 @@ where
{
#[cfg(feature = "only_legacy")]
_ if self.only_legacy() => {
info!(target: "aleph-party", "Running session with legacy-only AlephBFT version.");
info!(target: LOG_TARGET, "Running session with legacy-only AlephBFT version.");
self.legacy_subtasks(params)
}
// The `as`es here should be removed, but this would require a pallet migration and I
// am lazy.
Ok(version) if version == CURRENT_VERSION as u32 => {
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is current.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is current.", version);
self.current_subtasks(params)
}
Ok(version) if version == LEGACY_VERSION as u32 => {
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is legacy.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is legacy.", version);
self.legacy_subtasks(params)
}
Ok(version) if version > CURRENT_VERSION as u32 => {
Expand All @@ -408,8 +416,8 @@ where
)
}
Ok(version) => {
info!(target: "aleph-party", "Attempting to run session with too old version {}, likely because we are synchronizing old sessions for which we have keys. This will not work, but it doesn't matter.", version);
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is legacy.", LEGACY_VERSION);
info!(target: LOG_TARGET, "Attempting to run session with too old version {}, likely because we are synchronizing old sessions for which we have keys. This will not work, but it doesn't matter.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is legacy.", LEGACY_VERSION);
self.legacy_subtasks(params)
}
_ => {
Expand Down Expand Up @@ -461,7 +469,7 @@ where
self.spawn_handle
.spawn_essential("aleph/session_authority", async move {
if subtasks.wait_completion().await.is_err() {
warn!(target: "aleph-party", "Authority subtasks failed.");
warn!(target: LOG_TARGET, "Authority subtasks failed.");
}
}),
node_id,
Expand Down
2 changes: 2 additions & 0 deletions finality-aleph/src/party/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub mod traits;
#[cfg(test)]
mod mocks;

const LOG_TARGET: &str = "aleph-party";

pub(crate) struct ConsensusPartyParams<CS, NSM> {
pub session_authorities: ReadOnlySessionMap,
pub chain_state: CS,
Expand Down
2 changes: 1 addition & 1 deletion primitives/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ pub const DEFAULT_FINALITY_VERSION: Version = 0;
pub const CURRENT_FINALITY_VERSION: u16 = LEGACY_FINALITY_VERSION + 1;

/// Legacy version of abft.
pub const LEGACY_FINALITY_VERSION: u16 = 3;
pub const LEGACY_FINALITY_VERSION: u16 = 4;

/// Percentage of validator performance that is treated as 100% performance
pub const LENIENT_THRESHOLD: Perquintill = Perquintill::from_percent(90);
Expand Down
13 changes: 12 additions & 1 deletion scripts/run_nodes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Usage:
[-p|--base-path BASE_PATH]
if specified, use given base path (keystore, db, AlephBFT backups)
if not specified, base path is ./run-nodes-local
[--finality-version]
which finality version should be used, default = legacy
[--dont-bootstrap]
set if you don't want to bootstrap chain, ie generate keystore and chainspec
[--dont-build]
Expand All @@ -85,6 +87,7 @@ DONT_BOOTSTRAP=${DONT_BOOTSTRAP:-""}
DONT_BUILD_ALEPH_NODE=${DONT_BUILD_ALEPH_NODE:-""}
DONT_DELETE_DB=${DONT_DELETE_DB:-""}
DONT_REMOVE_ABFT_BACKUPS=${DONT_REMOVE_ABFT_BACKUPS:-""}
FINALITY_VERSION=${FINALITY_VERSION:-"legacy"}

while [[ $# -gt 0 ]]; do
case "$1" in
Expand All @@ -100,6 +103,10 @@ while [[ $# -gt 0 ]]; do
BASE_PATH="$2"
shift;shift
;;
--finality-version)
FINALITY_VERSION="$2"
shift;shift
;;
--dont-bootstrap)
DONT_BOOTSTRAP="true"
shift
Expand Down Expand Up @@ -219,6 +226,9 @@ fi
if ! command -v jq &> /dev/null; then
error "jq could not be found on PATH!"
fi
if [[ "${FINALITY_VERSION}" != "current" && "${FINALITY_VERSION}" != "legacy" ]]; then
error "Flag finality-version should be either current or legacy."
timorleph marked this conversation as resolved.
Show resolved Hide resolved
fi

# ------------------- main script starts here ------------------------------

Expand Down Expand Up @@ -276,7 +286,8 @@ if [[ -z "${DONT_BOOTSTRAP}" ]]; then
--account-ids "${all_account_ids_string}" \
--authorities-account-ids "${validator_ids_string}" \
--chain-type local > "${BASE_PATH}/chainspec.json" \
--rich-account-ids "${all_account_ids_string}"
--rich-account-ids "${all_account_ids_string}" \
--finality-version "${FINALITY_VERSION}"

if [[ "${DONT_REMOVE_ABFT_BACKUPS}" == "true" ]]; then
all_account_ids=(${validator_account_ids[@]} ${rpc_node_account_ids[@]})
Expand Down