Skip to content

Commit

Permalink
Add basic Prometheus metrics for binlog-collector
Browse files Browse the repository at this point in the history
The following metrics have been added:
- pxc_binlog_collector_success_total
- pxc_binlog_collector_failure_total
- pxc_binlog_collector_last_processing_timestamp
- pxc_binlog_collector_last_upload_timestamp
- pxc_binlog_collector_gap_detected_total

Additionally, a simple /health endpoint has been added
  • Loading branch information
s10 committed Dec 13, 2024
1 parent 698dd17 commit 5742cc6
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 0 deletions.
50 changes: 50 additions & 0 deletions cmd/pitr/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,53 @@ import (

"github.com/go-sql-driver/mysql"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"

"github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/pxc"
"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup/storage"
)

var (
pxcBinlogCollectorBackupSuccess = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "pxc_binlog_collector_success_total",
Help: "Total number of successful binlog backups",
},
)
pxcBinlogCollectorBackupFailure = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "pxc_binlog_collector_failure_total",
Help: "Total number of failed binlog backups",
},
)
pxcBinlogCollectorLastProcessingTime = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "pxc_binlog_collector_last_processing_timestamp",
Help: "Timestamp of the last successful binlog processing",
},
)
pxcBinlogCollectorLastUploadTime = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "pxc_binlog_collector_last_upload_timestamp",
Help: "Timestamp of the last successful binlog upload",
},
)
pxcBinlogCollectorGapDetected = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "pxc_binlog_collector_gap_detected_total",
Help: "Total number of detected gaps in binlog uploads",
},
)
)

func init() {
prometheus.MustRegister(pxcBinlogCollectorBackupSuccess)
prometheus.MustRegister(pxcBinlogCollectorBackupFailure)
prometheus.MustRegister(pxcBinlogCollectorLastProcessingTime)
prometheus.MustRegister(pxcBinlogCollectorLastUploadTime)
prometheus.MustRegister(pxcBinlogCollectorGapDetected)
}

type Collector struct {
db *pxc.PXC
storage storage.Storage
Expand Down Expand Up @@ -103,6 +145,7 @@ func New(ctx context.Context, c Config) (*Collector, error) {
func (c *Collector) Run(ctx context.Context) error {
err := c.newDB(ctx)
if err != nil {
pxcBinlogCollectorBackupFailure.Inc()
return errors.Wrap(err, "new db connection")
}
defer c.close()
Expand All @@ -113,9 +156,11 @@ func (c *Collector) Run(ctx context.Context) error {

err = c.CollectBinLogs(ctx)
if err != nil {
pxcBinlogCollectorBackupFailure.Inc()
return errors.Wrap(err, "collect binlog files")
}

pxcBinlogCollectorBackupSuccess.Inc()
return nil
}

Expand Down Expand Up @@ -369,6 +414,7 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
if lastUploadedBinlogName == "" {
log.Println("ERROR: Couldn't find the binlog that contains GTID set:", c.lastUploadedSet.Raw())
log.Println("ERROR: Gap detected in the binary logs. Binary logs will be uploaded anyway, but full backup needed for consistent recovery.")
pxcBinlogCollectorGapDetected.Inc()
if err := createGapFile(c.lastUploadedSet); err != nil {
return errors.Wrap(err, "create gap file")
}
Expand All @@ -382,6 +428,7 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {

if len(list) == 0 {
log.Println("No binlogs to upload")
pxcBinlogCollectorLastProcessingTime.SetToCurrentTime()
return nil
}

Expand Down Expand Up @@ -411,6 +458,9 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
return errors.Wrap(err, "update timeline file")
}
}

pxcBinlogCollectorLastUploadTime.SetToCurrentTime()
pxcBinlogCollectorLastProcessingTime.SetToCurrentTime()
return nil
}

Expand Down
14 changes: 14 additions & 0 deletions cmd/pitr/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"syscall"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/recoverer"

"github.com/caarlos0/env"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

func main() {
Expand All @@ -23,6 +25,13 @@ func main() {
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, os.Interrupt)
defer stop()

go func() {
http.Handle("/metrics", promhttp.Handler())
http.HandleFunc("/health", healthHandler)
log.Fatal(http.ListenAndServe(":8080", nil))
}()

switch command {
case "collect":
runCollector(ctx)
Expand All @@ -34,6 +43,11 @@ func main() {
}
}

func healthHandler(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("ok"))

Check failure on line 48 in cmd/pitr/main.go

View workflow job for this annotation

GitHub Actions / runner / suggester / golangci-lint

Error return value of `w.Write` is not checked (errcheck)
}

func runCollector(ctx context.Context) {
config, err := getCollectorConfig()
if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions pkg/pxc/app/deployment/binlog-collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ func GetBinlogCollectorDeployment(cr *api.PerconaXtraDBCluster, initImage string
},
},
}

if cr.CompareVersionWith("1.16.0") >= 0 {
container.Ports = []corev1.ContainerPort{
{
ContainerPort: 8080,
Name: "metrics",
},
}
}

replicas := int32(1)

var initContainers []corev1.Container
Expand Down

0 comments on commit 5742cc6

Please sign in to comment.