openucx · rakhmets · Dec 30, 2024 · Dec 30, 2024 · Dec 30, 2024 · iyastreb
diff --git a/src/ucp/proto/proto_perf.c b/src/ucp/proto/proto_perf.c
@@ -430,6 +430,28 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
     return ucp_proto_perf_aggregate(name, perf_elems, 2, perf_p);
 }
 
+void ucp_proto_perf_apply_bias(ucp_proto_perf_t *perf, double bias)
+{
+    ucs_linear_func_t bias_func = ucs_linear_func_make(0.0, 1.0 - bias);
+    ucp_proto_perf_node_t *bias_node;
+    ucp_proto_perf_factor_id_t fid;
+    ucp_proto_perf_segment_t *seg;
+
+    if (fabs(bias) <= UCP_PROTO_PERF_EPSILON) {
+        return;
+    }
+
+    ucp_proto_perf_segment_foreach(seg, perf) {
+        for (fid = 0; fid < UCP_PROTO_PERF_FACTOR_LAST; ++fid) {
+            seg->perf_factors[fid] =
+                    ucs_linear_func_compose(bias_func, seg->perf_factors[fid]);
+        }
+        ucp_proto_perf_node_update_factors(seg->node, seg->perf_factors);
+        bias_node = ucp_proto_perf_node_new_data("bias", "%.2f %%", bias);
+        ucp_proto_perf_node_own_child(seg->node, &bias_node);
+    }
+}
+
 /* TODO:
  * Reconsider correctness of PPLN perf estimation logic since in case of async
  * operations it seems wrong to choose the longest factor without paying

diff --git a/src/ucp/proto/proto_perf.h b/src/ucp/proto/proto_perf.h
@@ -159,6 +159,16 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
                                        ucp_proto_perf_t **perf_p);
 
 
+/**
+ * Apply a bias change to the given perf structure.
+ *
+ * @param [in] perf         Performance data structure to update.
+ * @param [in] bias         Bias to apply. A bias equal to 0.1 indicates a 10%
+ *                          performance improvement.
+ */
+void ucp_proto_perf_apply_bias(ucp_proto_perf_t *perf, double bias);
+
+
 /**
  * Expand given perf by estimation that all messages on interval
  * [end of @a frag_seg + 1, @a max_length] would be sent in a pipeline async

diff --git a/src/ucp/rndv/proto_rndv.c b/src/ucp/rndv/proto_rndv.c
@@ -403,6 +403,7 @@ static void ucp_proto_rndv_ctrl_variant_probe(
         cfg_thresh = remote_proto->cfg_thresh;
     }
 
+    ucp_proto_perf_apply_bias(perf, params->perf_bias);
     ucp_proto_select_add_proto(&params->super.super, cfg_thresh, cfg_priority,
                                perf, rpriv, priv_size);
 

diff --git a/test/gtest/ucp/test_ucp_proto.cc b/test/gtest/ucp/test_ucp_proto.cc
@@ -717,6 +717,45 @@ UCS_TEST_F(test_proto_perf, intersect_first)
     expect_empty_range(5000, SIZE_MAX);
 }
 
+UCS_TEST_F(test_proto_perf, apply_zero_bias) {
+    m_perf = create();
+    add_func(0, SIZE_MAX, UCP_PROTO_PERF_FACTOR_LOCAL_TL, local_tl_func);
+    add_func(0, SIZE_MAX, UCP_PROTO_PERF_FACTOR_REMOTE_TL, remote_tl_func);
+
+    /* Apply zero bias */
+    ucp_proto_perf_apply_bias(m_perf.get(), 0);
+
+    make_flat_perf();
+    print_perf();
+
+    expect_perf(0, SIZE_MAX,
+                {{UCP_PROTO_PERF_FACTOR_LOCAL_TL, local_tl_func},
+                 {UCP_PROTO_PERF_FACTOR_REMOTE_TL, remote_tl_func}});
+}
+
+UCS_TEST_F(test_proto_perf, apply_bias) {
+    m_perf = create();
+    add_func(0, SIZE_MAX, UCP_PROTO_PERF_FACTOR_LOCAL_TL, local_tl_func);
+    add_func(0, SIZE_MAX, UCP_PROTO_PERF_FACTOR_REMOTE_TL, remote_tl_func);
+
+    /* Apply 10% bias */
+    double bias = 0.1;
+    ucp_proto_perf_apply_bias(m_perf.get(), bias);
+
+    make_flat_perf();
+    print_perf();
+
+    /* Calculate expected */
+    auto bias_func          = ucs_linear_func_make(0, 1 - bias);
+    auto exp_local_tl_func  = ucs_linear_func_compose(bias_func, local_tl_func);
+    auto exp_remote_tl_func = ucs_linear_func_compose(bias_func,
+                                                      remote_tl_func);
+
+    expect_perf(0, SIZE_MAX,
+                {{UCP_PROTO_PERF_FACTOR_LOCAL_TL, exp_local_tl_func},
+                 {UCP_PROTO_PERF_FACTOR_REMOTE_TL, exp_remote_tl_func}});
+}
+
 UCS_TEST_F(test_proto_perf, intersect_last)
 {
     /*