diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 613a380b40..f822f832ce 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -35,6 +35,10 @@ Use subheadings with the "=====" level for adding notes for unreleased changes: ===== Features * Added a configuration option to use queues in names of spring-rabbit transactions - {pull}3424[#3424] +[float] +===== Features +* Add option to retry JMX metrics capture in case of exception - {pull}3511[#3511] + [float] ===== Bug fixes * Add support to CLI attach download for new agent signature for 1.46.0+ - {pull}3513[#3513] diff --git a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java index e1026b4e8c..61f6212efc 100644 --- a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java +++ b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java @@ -18,12 +18,16 @@ */ package co.elastic.apm.agent.jmx; +import co.elastic.apm.agent.tracer.configuration.TimeDuration; +import co.elastic.apm.agent.tracer.configuration.TimeDurationValueConverter; import org.stagemonitor.configuration.ConfigurationOption; import org.stagemonitor.configuration.ConfigurationOptionProvider; import java.util.Collections; import java.util.List; +import static co.elastic.apm.agent.tracer.configuration.RangeValidator.isNotInRange; + public class JmxConfiguration extends ConfigurationOptionProvider { private ConfigurationOption> captureJmxMetrics = ConfigurationOption.>builder(JmxMetric.TokenValueConverter.INSTANCE, List.class) @@ -137,4 +141,15 @@ public class JmxConfiguration extends ConfigurationOptionProvider { ConfigurationOption> getCaptureJmxMetrics() { return captureJmxMetrics; } + + private final ConfigurationOption faildRetryInterval = TimeDurationValueConverter.durationOption("m") + .key("jmx_failed_retry_interval") + .tags("internal") + .description("If set to a value greater or equal to 1m, the agent will retry failed JMX metric registrations.") + .addValidator(isNotInRange(TimeDuration.of("1ms"), TimeDuration.of("59s"))) + .buildWithDefault(TimeDuration.of("0m")); + + public ConfigurationOption getFaildRetryInterval() { + return faildRetryInterval; + } } diff --git a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java index 685eb59992..5560f338a1 100644 --- a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java +++ b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java @@ -23,10 +23,12 @@ import co.elastic.apm.agent.metrics.DoubleSupplier; import co.elastic.apm.agent.metrics.Labels; import co.elastic.apm.agent.metrics.MetricRegistry; -import co.elastic.apm.agent.tracer.GlobalLocks; +import co.elastic.apm.agent.sdk.internal.util.ExecutorUtils; +import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils; import co.elastic.apm.agent.sdk.logging.Logger; import co.elastic.apm.agent.sdk.logging.LoggerFactory; -import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils; +import co.elastic.apm.agent.tracer.GlobalLocks; +import co.elastic.apm.agent.tracer.configuration.TimeDuration; import org.stagemonitor.configuration.ConfigurationOption; import javax.annotation.Nullable; @@ -54,6 +56,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; public class JmxMetricTracker extends AbstractLifecycleListener { @@ -68,9 +71,17 @@ public class JmxMetricTracker extends AbstractLifecycleListener { @Nullable private volatile NotificationListener listener; + private final List failedMetrics; + + @Nullable + private ScheduledExecutorService retryExecutor; + public JmxMetricTracker(ElasticApmTracer tracer) { jmxConfiguration = tracer.getConfig(JmxConfiguration.class); metricRegistry = tracer.getMetricRegistry(); + + // using a synchronized list so adding to the list does not require synchronization + failedMetrics = Collections.synchronizedList(new ArrayList()); } @Override @@ -175,8 +186,15 @@ synchronized void init(final MBeanServer platformMBeanServer) { jmxConfiguration.getCaptureJmxMetrics().addChangeListener(new ConfigurationOption.ChangeListener>() { @Override public void onChange(ConfigurationOption configurationOption, List oldValue, List newValue) { - List oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer); - List newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer); + List registrationErrors = new ArrayList(); // those are not needed + List oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer, registrationErrors); + + List newRegistrations; + synchronized (failedMetrics) { + failedMetrics.clear(); + newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer, failedMetrics); + } + for (JmxMetricRegistration addedRegistration : removeAll(oldRegistrations, newRegistrations)) { addedRegistration.register(platformMBeanServer, metricRegistry); @@ -184,10 +202,36 @@ public void onChange(ConfigurationOption configurationOption, List for (JmxMetricRegistration deletedRegistration : removeAll(newRegistrations, oldRegistrations)) { deletedRegistration.unregister(metricRegistry); } - } }); - register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer); + + ConfigurationOption failedRetryConfig = jmxConfiguration.getFaildRetryInterval(); + if (!failedRetryConfig.isDefault()) { + long retryMillis = failedRetryConfig.getValue().getMillis(); + if (retryExecutor != null) { + ExecutorUtils.shutdownAndWaitTermination(retryExecutor); + } + + retryExecutor = ExecutorUtils.createSingleThreadSchedulingDaemonPool("jmx-retry"); + retryExecutor.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + retryFailedJmx(platformMBeanServer); + } + }, retryMillis, retryMillis, TimeUnit.MILLISECONDS); + } + + register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer, failedMetrics); + } + + // package-private for testing + void retryFailedJmx(MBeanServer platformMBeanServer) { + List failed = JmxMetricTracker.this.failedMetrics; + synchronized (failed) { + List toRetry = new ArrayList<>(failed); + failed.clear(); + register(toRetry, platformMBeanServer, failed); + } } private void registerMBeanNotificationListener(final MBeanServer server) { @@ -217,7 +261,7 @@ private void addMBean(ObjectName mBeanName, JmxMetric jmxMetric) { ObjectName metricName = jmxMetric.getObjectName(); if (metricName.apply(mBeanName) || matchesJbossStatisticsPool(mBeanName, metricName, server)) { logger.debug("MBean added at runtime: {}", jmxMetric.getObjectName()); - register(Collections.singletonList(jmxMetric), server); + register(Collections.singletonList(jmxMetric), server, failedMetrics); } } @@ -280,28 +324,36 @@ private static List removeAll(List removeFromThis, List toRemove) { return result; } - private void register(List jmxMetrics, MBeanServer server) { - for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server)) { + private void register(List jmxMetrics, MBeanServer server, List failedMetrics) { + for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server, failedMetrics)) { registration.register(server, metricRegistry); } } /** * A single {@link JmxMetric} can yield multiple {@link JmxMetricRegistration}s if the {@link JmxMetric} contains multiple attributes + * + * @param jmxMetrics JMX metrics to register + * @param server MBean server + * @param failedMetrics list of JMX metrics that failed to register (out) */ - private List compileJmxMetricRegistrations(List jmxMetrics, MBeanServer server) { - List registrations = new ArrayList<>(); + private List compileJmxMetricRegistrations(List jmxMetrics, MBeanServer server, List failedMetrics) { + List globalRegistrations = new ArrayList<>(); for (JmxMetric jmxMetric : jmxMetrics) { + List metricRegistrations = new ArrayList<>(); try { - addJmxMetricRegistration(jmxMetric, registrations, server); + addJmxMetricRegistration(jmxMetric, metricRegistrations, server); + globalRegistrations.addAll(metricRegistrations); } catch (Exception e) { + failedMetrics.add(jmxMetric); logger.error("Failed to register JMX metric {}", jmxMetric.toString(), e); } + } - return registrations; + return globalRegistrations; } - private static void addJmxMetricRegistration(final JmxMetric jmxMetric, List registrations, MBeanServer server) throws JMException { + private void addJmxMetricRegistration(final JmxMetric jmxMetric, List registrations, MBeanServer server) throws JMException { Set mbeans = server.queryMBeans(jmxMetric.getObjectName(), null); if (!mbeans.isEmpty()) { logger.debug("Found mbeans for object name {}", jmxMetric.getObjectName()); @@ -355,20 +407,21 @@ private static String metricPrepend(Labels labels) { return ""; } - private static void addJmxMetricRegistration(JmxMetric jmxMetric, List registrations, ObjectName objectName, Object value, JmxMetric.Attribute attribute, String attributeName, String metricPrepend) throws AttributeNotFoundException { + private void addJmxMetricRegistration(JmxMetric jmxMetric, List registrations, ObjectName objectName, Object value, JmxMetric.Attribute attribute, String attributeName, @Nullable String metricPrepend) throws AttributeNotFoundException { + String effectiveAttributeName = metricPrepend == null ? attributeName : metricPrepend + attributeName; + boolean unsubscribeOnError = jmxConfiguration.getFaildRetryInterval().isDefault(); if (value instanceof Number) { logger.debug("Found number attribute {}={}", attribute.getJmxAttributeName(), value); registrations.add( new JmxMetricRegistration( attribute.getMetricName( - metricPrepend == null ? - attributeName : - metricPrepend + attributeName + effectiveAttributeName ), attribute.getLabels(objectName), attributeName, null, - objectName + objectName, + unsubscribeOnError ) ); } else if (value instanceof CompositeData) { @@ -380,14 +433,12 @@ private static void addJmxMetricRegistration(JmxMetric jmxMetric, List toUnregister; + private JmxMetricTracker jmxTracker; + private MBeanServer mbeanServer; @BeforeEach void setUp() { tracer = MockTracer.createRealTracer(); metricRegistry = tracer.getMetricRegistry(); config = tracer.getConfig(JmxConfiguration.class); - tracer.getLifecycleListener(JmxMetricTracker.class).init(ManagementFactory.getPlatformMBeanServer()); + jmxTracker = tracer.getLifecycleListener(JmxMetricTracker.class); + mbeanServer = ManagementFactory.getPlatformMBeanServer(); + jmxTracker.init(mbeanServer); + toUnregister = new ArrayList<>(); } @AfterEach void cleanup() { tracer.stop(); + for (ObjectName name : toUnregister) { + try { + mbeanServer.unregisterMBean(name); + } catch (Exception e) { + // silently ignored + } + } + toUnregister.clear(); + } + + private void registerMBean(Object object, ObjectName objectName) throws Exception { + toUnregister.add(objectName); + mbeanServer.registerMBean(object, objectName); } @Test @@ -64,10 +87,11 @@ void testAvailableProcessors() throws Exception { @Test void testHeap() throws Exception { setConfig(JmxMetric.valueOf("object_name[java.lang:type=Memory] attribute[HeapMemoryUsage:metric_name=heap]")); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.committed", Labels.Mutable.of("type", "Memory"))).isPositive(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.init", Labels.Mutable.of("type", "Memory"))).isPositive(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.used", Labels.Mutable.of("type", "Memory"))).isPositive(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.max", Labels.Mutable.of("type", "Memory"))).isPositive(); + Labels.Mutable labels = Labels.Mutable.of("type", "Memory"); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.committed", labels)).isPositive(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.init", labels)).isPositive(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.used", labels)).isPositive(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.heap.max", labels)).isPositive(); printMetricSets(); } @@ -77,8 +101,8 @@ void testGC() throws Exception { setConfig(JmxMetric.valueOf("object_name[java.lang:type=GarbageCollector,name=*] attribute[CollectionCount:metric_name=collection_count] attribute[CollectionTime]")); for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) { String memoryManagerName = gcBean.getName(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", getGcLabels(memoryManagerName))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", getGcLabels(memoryManagerName))).isNotNegative(); } printMetricSets(); } @@ -88,8 +112,8 @@ void testAttributeWildcard() throws Exception { setConfig(JmxMetric.valueOf("object_name[java.lang:type=GarbageCollector,name=*] attribute[*]")); for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) { String memoryManagerName = gcBean.getName(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", getGcLabels(memoryManagerName))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", getGcLabels(memoryManagerName))).isNotNegative(); } printMetricSets(); } @@ -99,63 +123,87 @@ void testRemoveMetric() throws Exception { setConfig(JmxMetric.valueOf("object_name[java.lang:type=GarbageCollector,name=*] attribute[CollectionCount:metric_name=collection_count] attribute[CollectionTime]")); for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) { String memoryManagerName = gcBean.getName(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", getGcLabels(memoryManagerName))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", getGcLabels(memoryManagerName))).isNotNegative(); } setConfig(JmxMetric.valueOf("object_name[java.lang:type=GarbageCollector,name=*] attribute[CollectionCount]")); for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) { String memoryManagerName = gcBean.getName(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionCount", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNotNegative(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNaN(); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"))).isNaN(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionCount", getGcLabels(memoryManagerName))).isNotNegative(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.collection_count", getGcLabels(memoryManagerName))).isNaN(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.CollectionTime", getGcLabels(memoryManagerName))).isNaN(); } } + private static Labels getGcLabels(String memoryManagerName) { + return Labels.Mutable.of("name", memoryManagerName).add("type", "GarbageCollector"); + } + @Test void testMBeanAddedLater() throws Exception { setConfig(JmxMetric.valueOf("object_name[foo:type=Bar] attribute[Baz]")); ObjectName objectName = new ObjectName("foo:type=Bar"); - ManagementFactory.getPlatformMBeanServer().registerMBean(new TestMetric(), objectName); - try { - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("type", "Bar"))).isEqualTo(42); - } finally { - ManagementFactory.getPlatformMBeanServer().unregisterMBean(objectName); - } + registerMBean(new TestMetric(), objectName); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("type", "Bar"))).isEqualTo(42); } @Test void testMBeanMatchingWildcardAddedLater() throws Exception { ObjectName objectName = new ObjectName("foo:type=Foo,name=mbean1"); ObjectName objectName2 = new ObjectName("foo:type=Foo,name=mbean2"); - try { - ManagementFactory.getPlatformMBeanServer().registerMBean(new TestMetric(), objectName); - setConfig(JmxMetric.valueOf("object_name[foo:type=Foo,name=*] attribute[Baz]")); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "mbean1").add("type", "Foo"))).isEqualTo(42); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "mbean2").add("type", "Foo"))).isNaN(); - - ManagementFactory.getPlatformMBeanServer().registerMBean(new TestMetric(), objectName2); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "mbean1").add("type", "Foo"))).isEqualTo(42); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "mbean2").add("type", "Foo"))).isEqualTo(42); - } finally { - ManagementFactory.getPlatformMBeanServer().unregisterMBean(objectName); - ManagementFactory.getPlatformMBeanServer().unregisterMBean(objectName2); - } + Labels labels1 = Labels.Mutable.of("name", "mbean1").add("type", "Foo"); + Labels labels2 = Labels.Mutable.of("name", "mbean2").add("type", "Foo"); + + registerMBean(new TestMetric(), objectName); + setConfig(JmxMetric.valueOf("object_name[foo:type=Foo,name=*] attribute[Baz]")); + + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels1)).isEqualTo(42); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels2)).isNaN(); + + registerMBean(new TestMetric(), objectName2); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels1)).isEqualTo(42); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels2)).isEqualTo(42); } @Test void testMBeanUnregister() throws Exception { ObjectName objectName = new ObjectName("foo:type=Foo,name=testMBeanUnregister"); - ManagementFactory.getPlatformMBeanServer().registerMBean(new TestMetric(), objectName); - try { - setConfig(JmxMetric.valueOf("object_name[foo:type=Foo,name=*] attribute[Baz]")); - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "testMBeanUnregister").add("type", "Foo"))).isEqualTo(42); - } finally { - ManagementFactory.getPlatformMBeanServer().unregisterMBean(objectName); - } + registerMBean(new TestMetric(), objectName); + setConfig(JmxMetric.valueOf("object_name[foo:type=Foo,name=*] attribute[Baz]")); + Labels labels = Labels.Mutable.of("name", "testMBeanUnregister").add("type", "Foo"); + + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels)).isEqualTo(42); + + mbeanServer.unregisterMBean(objectName); // trying to get a non-existing MBean metric value will unregister it - assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", Labels.Mutable.of("name", "testMBeanUnregister").add("type", "Foo"))).isNaN(); - assertThat(metricRegistry.getGauge("jvm.jmx.Baz", Labels.Mutable.of("name", "testMBeanUnregister").add("type", "Foo"))).isNull(); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels)).isNaN(); + assertThat(metricRegistry.getGauge("jvm.jmx.Baz", labels)).isNull(); + } + + @Test + void testMBeanExceptionWhenRegisteredThenOk() throws Exception { + ObjectName objectName = new ObjectName("foo:type=Foo,name=testMBeanExceptionWhenRegisteredThenOk"); + TestMetric testMetric = new TestMetric(); + testMetric.setValue(-1); + assertThatThrownBy(testMetric::getBaz).isInstanceOf(RuntimeException.class); + + setConfig(JmxMetric.valueOf("object_name[foo:type=Foo,name=*] attribute[Baz]")); + + registerMBean(testMetric, objectName); + + Labels labels = Labels.Mutable.of("name", "testMBeanExceptionWhenRegisteredThenOk").add("type", "Foo"); + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels)) + .isNaN(); + assertThat(metricRegistry.getGauge("jvm.jmx.Baz", labels)) + .isNull(); + + testMetric.setValue(37); + // calling directly the JMX tracker to avoid testing async execution + jmxTracker.retryFailedJmx(mbeanServer); + + assertThat(metricRegistry.getGaugeValue("jvm.jmx.Baz", labels)) + .isEqualTo(37); } public interface TestMetricMBean { @@ -164,9 +212,22 @@ public interface TestMetricMBean { public static class TestMetric implements TestMetricMBean { + private int value; + + public TestMetric() { + this.value = 42; + } + + void setValue(int value) { + this.value = value; + } + @Override public int getBaz() { - return 42; + if (value < 0) { + throw new RuntimeException("value less than zero"); + } + return value; } }