From 416948ccd73e5d8e2d31591a60724a22fe31a773 Mon Sep 17 00:00:00 2001 From: Milan Gallas Date: Tue, 7 Jan 2025 08:36:02 +0100 Subject: [PATCH] fix(Observe): missing traces in Mlflow Signed-off-by: Milan Gallas --- package.json | 3 +- pnpm-lock.yaml | 116 ++++++++++++++++++++++++++++++++++ src/opentelemetry.ts | 46 ++++++++------ src/runs/execution/execute.ts | 2 + src/server.ts | 1 + 5 files changed, 148 insertions(+), 20 deletions(-) diff --git a/package.json b/package.json index 9bdd7a2..c5a736b 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "type": "module", "scripts": { "build": "vite build && cp -R src/embedding/adapters/caikit/grpc/protos dist/embedding/adapters/caikit/grpc/protos", - "start": "node --enable-source-maps --experimental-loader=@opentelemetry/instrumentation/hook.mjs --import ./dist/opentelemetry.js ./dist/server.js", + "start": "node ./dist/server.js", "start:dev": "tsx watch src/server.ts | pino-pretty --singleLine", "start:dev:nowatch": "tsx src/server.ts | pino-pretty --singleLine", "start:dev:workers": "concurrently npm:start:dev:workers:*", @@ -43,6 +43,7 @@ "@mikro-orm/seeder": "6.2.9", "@opentelemetry/auto-instrumentations-node": "^0.54.0", "@opentelemetry/exporter-metrics-otlp-http": "^0.54.0", + "@opentelemetry/exporter-trace-otlp-proto": "^0.57.0", "@opentelemetry/instrumentation": "^0.54.0", "@opentelemetry/sdk-node": "^0.54.0", "@opentelemetry/semantic-conventions": "^1.27.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 883ee95..a24f042 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -62,6 +62,9 @@ importers: '@opentelemetry/exporter-metrics-otlp-http': specifier: ^0.54.0 version: 0.54.2(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-proto': + specifier: ^0.57.0 + version: 0.57.0(@opentelemetry/api@1.9.0) '@opentelemetry/instrumentation': specifier: ^0.54.0 version: 0.54.2(@opentelemetry/api@1.9.0) @@ -947,6 +950,10 @@ packages: resolution: {integrity: sha512-Wr39+94UNNG3Ei9nv3pHd4AJ63gq5nSemMRpCd8fPwDL9rN3vK26lzxfH27mw16XzOSO+TpyQwBAMaLxaPWG0g==} engines: {node: '>=14'} + '@opentelemetry/api-logs@0.57.0': + resolution: {integrity: sha512-l1aJ30CXeauVYaI+btiynHpw341LthkMTv3omi1VJDX14werY2Wmv9n1yudMsq9HuY0m8PvXEVX4d8zxEb+WRg==} + engines: {node: '>=14'} + '@opentelemetry/api@1.9.0': resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} engines: {node: '>=8.0.0'} @@ -987,6 +994,12 @@ packages: peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/core@1.30.0': + resolution: {integrity: sha512-Q/3u/K73KUjTCnFUP97ZY+pBjQ1kPEgjOfXj/bJl8zW7GbXdkw6cwuyZk6ZTXkVgCBsYRYUzx4fvYK1jxdb9MA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/exporter-logs-otlp-grpc@0.54.2': resolution: {integrity: sha512-MQNmV5r96+5n3axLFgNYtVy62x8Ru7VERZH3zgC50KDcIKWCiQT3vHOtzakhzd1Wq0HqOgu6bzKdwzneSoDrEQ==} engines: {node: '>=14'} @@ -1065,6 +1078,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/exporter-trace-otlp-proto@0.57.0': + resolution: {integrity: sha512-M21nhl6JSAq8FTvs52/ISIvneRPg1uHNYk6q4YNNaEDGxz3GZZ6I6svYPZuQyL0O1c+mLkYNxzJ6p0rdS9/RUA==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/exporter-zipkin@1.27.0': resolution: {integrity: sha512-eGMY3s4QprspFZojqsuQyQpWNFpo+oNVE/aosTbtvAlrJBAlvXcwwsOROOHOd8Y9lkU4i0FpQW482rcXkgwCSw==} engines: {node: '>=14'} @@ -1335,6 +1354,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/otlp-exporter-base@0.57.0': + resolution: {integrity: sha512-QQl4Ngm3D6H8SDO0EM642ncTxjRsf/HDq7+IWIA0eaEK/NTsJeQ3iYJiZj3F4jkALnvyeM1kkwd+DHtqxTBx9Q==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/otlp-grpc-exporter-base@0.54.2': resolution: {integrity: sha512-HZtACQuLhgDcgNa9arGnVVGV28sSGQ+iwRgICWikFKiVxUsoWffqBvTxPa6G3DUTg5R+up97j/zxubEyxSAOHg==} engines: {node: '>=14'} @@ -1359,6 +1384,12 @@ packages: peerDependencies: '@opentelemetry/api': ^1.3.0 + '@opentelemetry/otlp-transformer@0.57.0': + resolution: {integrity: sha512-yHX7sdwkdAmSa6Jbi3caSLDWy0PCHS1pKQeKz8AIWSyQqL7IojHKgdk9A+7eRd98Z1n9YTdwWSWLnObvIqhEhQ==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + '@opentelemetry/propagation-utils@0.30.14': resolution: {integrity: sha512-RsdKGFd0PYG5Aop9aq8khYbR8Oq+lYTQBX/9/pk7b+8+0WwdFqrvGDmRxpBAH9hgIvtUgETeshlYctwjo2l9SQ==} engines: {node: '>=14'} @@ -1441,6 +1472,12 @@ packages: peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/resources@1.30.0': + resolution: {integrity: sha512-5mGMjL0Uld/99t7/pcd7CuVtJbkARckLVuiOX84nO8RtLtIz0/J6EOHM2TGvPZ6F4K+XjUq13gMx14w80SVCQg==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/sdk-logs@0.54.2': resolution: {integrity: sha512-yIbYqDLS/AtBbPjCjh6eSToGNRMqW2VR8RrKEy+G+J7dFG7pKoptTH5T+XlKPleP9NY8JZYIpgJBlI+Osi0rFw==} engines: {node: '>=14'} @@ -1453,6 +1490,12 @@ packages: peerDependencies: '@opentelemetry/api': '>=1.4.0 <1.10.0' + '@opentelemetry/sdk-logs@0.57.0': + resolution: {integrity: sha512-6Kbxdu/QE9LWH7+WSLmYo3DjAq+c55TiCLXiXu6b/2m2muy5SyOG2m0MrGqetyRpfYSSbIqHmJoqNVTN3+2a9g==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': '>=1.4.0 <1.10.0' + '@opentelemetry/sdk-metrics@1.27.0': resolution: {integrity: sha512-JzWgzlutoXCydhHWIbLg+r76m+m3ncqvkCcsswXAQ4gqKS+LOHKhq+t6fx1zNytvLuaOUBur7EvWxECc4jPQKg==} engines: {node: '>=14'} @@ -1465,6 +1508,12 @@ packages: peerDependencies: '@opentelemetry/api': '>=1.3.0 <1.10.0' + '@opentelemetry/sdk-metrics@1.30.0': + resolution: {integrity: sha512-5kcj6APyRMvv6dEIP5plz2qfJAD4OMipBRT11u/pa1a68rHKI2Ln+iXVkAGKgx8o7CXbD7FdPypTUY88ZQgP4Q==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + '@opentelemetry/sdk-node@0.54.2': resolution: {integrity: sha512-afn8GBpA7Gb55aU0LUxIQ+oe6QxLhsf+Te9iw12Non3ZAspzdoCcfz5+hqecwpuVpEDdnj5iSalF7VVaL2pDeg==} engines: {node: '>=14'} @@ -1489,6 +1538,12 @@ packages: peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/sdk-trace-base@1.30.0': + resolution: {integrity: sha512-RKQDaDIkV7PwizmHw+rE/FgfB2a6MBx+AEVVlAHXRG1YYxLiBpPX2KhmoB99R5vA4b72iJrjle68NDWnbrE9Dg==} + engines: {node: '>=14'} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/sdk-trace-node@1.27.0': resolution: {integrity: sha512-dWZp/dVGdUEfRBjBq2BgNuBlFqHCxyyMc8FsN0NX15X07mxSUO0SZRLyK/fdAVrde8nqFI/FEdMH4rgU9fqJfQ==} engines: {node: '>=14'} @@ -6361,6 +6416,10 @@ snapshots: dependencies: '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs@0.57.0': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api@1.9.0': {} '@opentelemetry/auto-instrumentations-node@0.54.0(@opentelemetry/api@1.9.0)(encoding@0.1.13)': @@ -6440,6 +6499,11 @@ snapshots: '@opentelemetry/api': 1.9.0 '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/core@1.30.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/exporter-logs-otlp-grpc@0.54.2(@opentelemetry/api@1.9.0)': dependencies: '@grpc/grpc-js': 1.12.2 @@ -6563,6 +6627,15 @@ snapshots: '@opentelemetry/resources': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/sdk-trace-base': 1.29.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-proto@0.57.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base': 0.57.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.57.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-zipkin@1.27.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6950,6 +7023,12 @@ snapshots: '@opentelemetry/core': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/otlp-transformer': 0.56.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-exporter-base@0.57.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-transformer': 0.57.0(@opentelemetry/api@1.9.0) + '@opentelemetry/otlp-grpc-exporter-base@0.54.2(@opentelemetry/api@1.9.0)': dependencies: '@grpc/grpc-js': 1.12.2 @@ -6988,6 +7067,17 @@ snapshots: '@opentelemetry/sdk-trace-base': 1.29.0(@opentelemetry/api@1.9.0) protobufjs: 7.4.0 + '@opentelemetry/otlp-transformer@0.57.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.57.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.57.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 1.30.0(@opentelemetry/api@1.9.0) + protobufjs: 7.4.0 + '@opentelemetry/propagation-utils@0.30.14(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7071,6 +7161,12 @@ snapshots: '@opentelemetry/core': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/resources@1.30.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/sdk-logs@0.54.2(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7085,6 +7181,13 @@ snapshots: '@opentelemetry/core': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/resources': 1.29.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs@0.57.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.57.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics@1.27.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7097,6 +7200,12 @@ snapshots: '@opentelemetry/core': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/resources': 1.29.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics@1.30.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-node@0.54.2(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7155,6 +7264,13 @@ snapshots: '@opentelemetry/resources': 1.29.0(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/sdk-trace-base@1.30.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 1.30.0(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.28.0 + '@opentelemetry/sdk-trace-node@1.27.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 diff --git a/src/opentelemetry.ts b/src/opentelemetry.ts index b1d7018..1c95bec 100644 --- a/src/opentelemetry.ts +++ b/src/opentelemetry.ts @@ -18,38 +18,46 @@ import 'dotenv/config'; import { setTimeout } from 'node:timers/promises'; -import { NodeSDK, resources, metrics } from '@opentelemetry/sdk-node'; +import { NodeSDK, resources, metrics, tracing } from '@opentelemetry/sdk-node'; import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http'; import { ATTR_DEPLOYMENT_ENVIRONMENT_NAME } from '@opentelemetry/semantic-conventions/incubating'; +import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const ENV = process.env.ENVIRONMENT; +export const isOpenTelemetryEnabled = Boolean(process.env.NODE_ENV === 'production'); +export const batchSpanProcessor = new tracing.BatchSpanProcessor(new OTLPTraceExporter({})); + export const opentelemetrySDK = new NodeSDK({ resource: new resources.Resource({ [ATTR_SERVICE_NAME]: `bee-api`, [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: ENV }), + spanProcessors: [batchSpanProcessor], metricReader: new metrics.PeriodicExportingMetricReader({ exporter: new OTLPMetricExporter() }), instrumentations: [...getNodeAutoInstrumentations()] }); -opentelemetrySDK.start(); - -let isShuttingDown = false; -const { promise, resolve } = Promise.withResolvers(); - -for (const event of ['beforeExit', 'SIGINT', 'SIGTERM']) { - process.once(event, () => { - if (!isShuttingDown) { - isShuttingDown = true; - Promise.race([opentelemetrySDK.shutdown(), setTimeout(5_000, null, { ref: false })]) - .catch((err) => { - // eslint-disable-next-line no-console - console.error(`Failed to execute shutdown hook`, err); - }) - .finally(() => resolve()); - } - return promise; - }); + +if (isOpenTelemetryEnabled) { + opentelemetrySDK.start(); + + let isShuttingDown = false; + const { promise, resolve } = Promise.withResolvers(); + + for (const event of ['beforeExit', 'SIGINT', 'SIGTERM']) { + process.once(event, () => { + if (!isShuttingDown) { + isShuttingDown = true; + Promise.race([opentelemetrySDK.shutdown(), setTimeout(5_000, null, { ref: false })]) + .catch((err) => { + // eslint-disable-next-line no-console + console.error(`Failed to execute shutdown hook`, err); + }) + .finally(() => resolve()); + } + return promise; + }); + } } diff --git a/src/runs/execution/execute.ts b/src/runs/execution/execute.ts index b38e0e0..b406bc3 100644 --- a/src/runs/execution/execute.ts +++ b/src/runs/execution/execute.ts @@ -46,6 +46,7 @@ import { LoadedRun } from '@/runs/execution/types.js'; import { UserResource } from '@/tools/entities/tool-resources/user-resource.entity.js'; import { SystemResource } from '@/tools/entities/tool-resources/system-resource.entity.js'; import { Attachment } from '@/messages/attachment.entity'; +import { batchSpanProcessor, isOpenTelemetryEnabled } from '@/opentelemetry.js'; const agentExecutionTime = new Summary({ name: 'agent_execution_time_seconds', @@ -144,6 +145,7 @@ export async function executeRun(run: LoadedRun) { ); await agentRun; + if (isOpenTelemetryEnabled) await batchSpanProcessor.forceFlush(); endAgentExecutionTimer(); run.complete(); diff --git a/src/server.ts b/src/server.ts index 6f974fb..4003f0b 100644 --- a/src/server.ts +++ b/src/server.ts @@ -14,6 +14,7 @@ * limitations under the License. */ +import './opentelemetry.js'; import '@/ui/auth-server.js'; import { JsonSchemaToTsProvider } from '@fastify/type-provider-json-schema-to-ts';