-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #623 from containers/otel-trace-workflow
udpate otel-collector workflow config
- Loading branch information
Showing
3 changed files
with
86 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,53 +48,55 @@ jobs: | |
run: | | ||
pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation | ||
- name: Download OpenTelemetry Collector Contrib | ||
run: | | ||
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz | ||
tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz | ||
- name: Write secrets to files | ||
run: | | ||
echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt | ||
echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt | ||
echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key | ||
- name: Configure OpenTelemetry Collector | ||
run: | | ||
echo ' | ||
extensions: | ||
basicauth/client: | ||
client_auth: | ||
username: "${{ secrets.OTEL_USERNAME }}" | ||
password: "${{ secrets.OTEL_PASSWORD }}" | ||
receivers: | ||
otlp: | ||
protocols: | ||
grpc: | ||
http: | ||
exporters: | ||
otlphttp: | ||
endpoint: https://otc.apps.platform-sts.pcbk.p1.openshiftapps.com | ||
auth: | ||
authenticator: basicauth/client | ||
tls: | ||
insecure: false | ||
ca_pem: "${{ secrets.ROSA_ROOT_CERT }}" | ||
debug: | ||
verbosity: detailed | ||
service: | ||
extensions: [basicauth/client] | ||
pipelines: | ||
traces: | ||
receivers: [otlp] | ||
exporters: [debug, otlphttp] | ||
receivers: | ||
otlp: | ||
protocols: | ||
grpc: | ||
http: | ||
exporters: | ||
otlphttp: | ||
endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}" | ||
tls: | ||
insecure: false | ||
cert_file: /tmp/server.crt | ||
key_file: /tmp/server.key | ||
ca_file: /tmp/ca.crt | ||
debug: | ||
verbosity: detailed | ||
service: | ||
pipelines: | ||
traces: | ||
receivers: [otlp] | ||
exporters: [debug, otlphttp] | ||
' > otel-collector-config.yaml | ||
- name: Run OpenTelemetry Collector | ||
run: | | ||
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz | ||
tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz | ||
chmod +x otelcol-contrib | ||
./otelcol-contrib --config otel-collector-config.yaml & | ||
./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 & | ||
- name: Install qemu dependency | ||
run: | | ||
sudo apt-get update | ||
sudo apt-get install -y qemu-user-static | ||
- name: Start build trace | ||
- name: Start job trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="build-image" | ||
export WORKFLOW_NAME="chatbot" | ||
export JOB_NAME="chatbot-build-and-push" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
|
@@ -108,54 +110,19 @@ jobs: | |
containerfiles: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}/app/Containerfile | ||
context: recipes/natural_language_processing/${{ env.IMAGE_NAME }}/app | ||
|
||
- name: End build trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="build-image" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
- name: Install Dependencies | ||
working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }} | ||
run: make install | ||
|
||
- name: Start download model trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="download-model" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
- name: Download model | ||
working-directory: ./models | ||
run: make download-model-granite | ||
|
||
- name: End download model trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="download-model" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
- name: Start functional test run trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="run-functional-tests" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
- name: Run Functional Tests | ||
shell: bash | ||
run: make functional-tests | ||
working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }} | ||
|
||
- name: End functional test run trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="run-functional-tests" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
- name: Login to Registry | ||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | ||
uses: redhat-actions/[email protected] | ||
|
@@ -164,13 +131,6 @@ jobs: | |
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Start push image trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="push-image" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
- name: Push Image | ||
id: push_image | ||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | ||
|
@@ -180,9 +140,10 @@ jobs: | |
tags: ${{ steps.build_image.outputs.tags }} | ||
registry: ${{ env.REGISTRY }} | ||
|
||
- name: End push image trace | ||
- name: End job trace | ||
run: | | ||
export WORKFLOW_NAME="chatbot-build-push" | ||
export STEP_NAME="push-image" | ||
export WORKFLOW_NAME="chatbot" | ||
export JOB_NAME="chatbot-build-and-push" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,9 @@ on: | |
workflow_dispatch: | ||
|
||
jobs: | ||
test: | ||
test-build: | ||
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')" | ||
runs-on: ubuntu-22.04 | ||
runs-on: ubuntu-24.04 | ||
steps: | ||
- uses: actions/[email protected] | ||
- name: Set up Python | ||
|
@@ -33,33 +33,29 @@ jobs: | |
- name: Write secrets to files | ||
run: | | ||
echo "${{ secrets.ROSA_OTEL_TLS_CERT }}" > /tmp/tls.crt | ||
echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt | ||
echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt | ||
echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key | ||
- name: Configure OpenTelemetry Collector | ||
run: | | ||
echo ' | ||
extensions: | ||
basicauth/client: | ||
client_auth: | ||
username: "${{ secrets.OTEL_USERNAME }}" | ||
password: "${{ secrets.OTEL_PASSWORD }}" | ||
receivers: | ||
otlp: | ||
protocols: | ||
grpc: | ||
http: | ||
exporters: | ||
otlphttp: | ||
endpoint: https://otc.apps.platform-sts.pcbk.p1.openshiftapps.com:4318 | ||
auth: | ||
authenticator: basicauth/client | ||
endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}" | ||
tls: | ||
insecure: false | ||
ca_file: /tmp/tls.crt | ||
cert_file: /tmp/server.crt | ||
key_file: /tmp/server.key | ||
ca_file: /tmp/ca.crt | ||
debug: | ||
verbosity: detailed | ||
service: | ||
extensions: [basicauth/client] | ||
pipelines: | ||
traces: | ||
receivers: [otlp] | ||
|
@@ -70,10 +66,10 @@ jobs: | |
run: | | ||
./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 & | ||
- name: Start build trace | ||
- name: Start job trace | ||
run: | | ||
export WORKFLOW_NAME="test-workflow" | ||
export STEP_NAME="build" | ||
export WORKFLOW_NAME="test-trace" | ||
export JOB_NAME="test-build" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
|
@@ -82,29 +78,15 @@ jobs: | |
echo "Simulating build step..." | ||
sleep 2 | ||
- name: End build trace | ||
run: | | ||
export WORKFLOW_NAME="test-workflow" | ||
export STEP_NAME="build" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
- name: Start test trace | ||
run: | | ||
export WORKFLOW_NAME="test-workflow" | ||
export STEP_NAME="test" | ||
export TRACE_ACTION="start" | ||
python ci/trace-steps.py | ||
- name: Test | ||
run: | | ||
echo "Simulating test step..." | ||
sleep 2 | ||
- name: End test trace | ||
- name: End job trace | ||
run: | | ||
export WORKFLOW_NAME="test-workflow" | ||
export STEP_NAME="test" | ||
export WORKFLOW_NAME="test-trace" | ||
export JOB_NAME="test-build" | ||
export TRACE_ACTION="end" | ||
python ci/trace-steps.py | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,75 +1,47 @@ | ||
import os | ||
import time | ||
import logging | ||
from datetime import datetime | ||
from opentelemetry import trace | ||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter | ||
from opentelemetry.sdk.resources import Resource | ||
from opentelemetry.sdk.trace import TracerProvider | ||
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter | ||
from opentelemetry.trace import SpanContext, TraceFlags, TraceState, NonRecordingSpan | ||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter | ||
|
||
# Set up logging | ||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
service_name = os.getenv("WORKFLOW_NAME", "default_service") | ||
job_name = os.getenv("JOB_NAME", "default_job") | ||
|
||
# Set up OpenTelemetry tracing | ||
trace.set_tracer_provider( | ||
TracerProvider( | ||
resource=Resource.create({"service.name": os.getenv("WORKFLOW_NAME")}) | ||
) | ||
) | ||
resource = Resource.create({"service.name": service_name}) | ||
trace.set_tracer_provider(TracerProvider(resource=resource)) | ||
tracer = trace.get_tracer(__name__) | ||
|
||
# Set up OTLP exporter to send to OpenTelemetry Collector | ||
otlp_exporter = OTLPSpanExporter(endpoint="http://0.0.0.0:4317", insecure=True) | ||
|
||
# Set up span processor | ||
span_processor = BatchSpanProcessor(otlp_exporter) | ||
trace.get_tracer_provider().add_span_processor(span_processor) | ||
|
||
# Optionally, export to console for debugging | ||
console_exporter = ConsoleSpanExporter() | ||
trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(console_exporter)) | ||
|
||
def retry_operation(operation, retries=3, delay=5): | ||
for attempt in range(retries): | ||
try: | ||
return operation() | ||
except Exception as e: | ||
logger.error(f"Attempt {attempt + 1} failed with error: {e}") | ||
if attempt < retries - 1: | ||
time.sleep(delay) | ||
else: | ||
raise | ||
|
||
def start_trace(step_name): | ||
span = tracer.start_span(name=step_name) | ||
return span | ||
|
||
def end_trace(span): | ||
span.end() | ||
console_span_processor = BatchSpanProcessor(ConsoleSpanExporter()) | ||
trace.get_tracer_provider().add_span_processor(console_span_processor) | ||
|
||
# Adding OTLP Span Exporter for actual data export | ||
otlp_exporter = OTLPSpanExporter(endpoint="localhost:4317", insecure=True) | ||
otlp_span_processor = BatchSpanProcessor(otlp_exporter) | ||
trace.get_tracer_provider().add_span_processor(otlp_span_processor) | ||
|
||
print("Tracer initialized with service name:", service_name) | ||
|
||
def set_start_time(): | ||
start_time = datetime.now().timestamp() | ||
with open("/tmp/start_time.txt", "w") as file: | ||
file.write(str(start_time)) | ||
print("Start time recorded") | ||
|
||
def calculate_duration(): | ||
with open("/tmp/start_time.txt", "r") as file: | ||
start_time = float(file.read()) | ||
end_time = datetime.now().timestamp() | ||
duration = end_time - start_time | ||
print(f"Total Duration: {duration}s") | ||
with tracer.start_as_current_span(job_name) as span: | ||
span.set_attribute("total_duration_s", duration) | ||
|
||
if __name__ == "__main__": | ||
step_name = os.getenv("STEP_NAME", "default_step") | ||
action = os.getenv("TRACE_ACTION", "start") | ||
|
||
if action == "start": | ||
span = retry_operation(lambda: start_trace(step_name)) | ||
with open(f"/tmp/trace_{step_name}.txt", "w") as f: | ||
f.write(str(span.get_span_context().trace_id)) | ||
set_start_time() | ||
elif action == "end": | ||
trace_id = os.getenv("TRACE_ID") | ||
if not trace_id: | ||
with open(f"/tmp/trace_{step_name}.txt", "r") as f: | ||
trace_id = f.read().strip() | ||
trace_id = int(trace_id, 16) # Convert trace_id back to int | ||
span_context = SpanContext( | ||
trace_id=trace_id, | ||
span_id=0, # Span ID will be generated | ||
trace_flags=TraceFlags(TraceFlags.SAMPLED), | ||
trace_state=TraceState(), | ||
is_remote=True | ||
) | ||
with tracer.start_as_current_span(name=step_name, context=trace.set_span_in_context(NonRecordingSpan(span_context))): | ||
span = tracer.start_span(name=step_name) | ||
end_trace(span) | ||
calculate_duration() |