diff --git a/.gitignore b/.gitignore index 6424d91..6d470c4 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,6 @@ go.work # runtime files -splunk_exporter.yml -/splunk_exporter \ No newline at end of file +/splunk_exporter.yml +/splunk_exporter +deploy/default.yml \ No newline at end of file diff --git a/README.md b/README.md index 83726fa..5b7b7e3 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ You will need a configuration file, follow [`splunk_exporter_example.yml`](./spl You need docker compose installed, a bash helper is provided to start the exporter and the whole test bench as a [docker compose environment](./deploy/README.md). ```shell -cd /deploy +cd deploy/ bash run.sh ``` @@ -27,27 +27,44 @@ To stop it: docker compose down ``` -## ๐Ÿ™‹ Contribute +## ๐Ÿ‘ท Contribute After doing some changes, possible to re-deploy splunk_exporter with the following command ```shell docker compose up -d --build splunk_exporter ``` +## ๐Ÿ› ๏ธ Configuration + +Splunk exporter needs to access management APIs +See an example configuration file in [`splunk_exporter_example.yml`](./splunk_exporter_example.yml). + ## ๐Ÿ“ metrics All metrics are **Gauge**. ### from API -| Prefix | Description | -| ------------------------------------------------------ | ------------------------------------------------- | -| `splunk_exporter_index_` | Numerical data coming from data/indexes endpoint. | -| `splunk_exporter_indexer_throughput_bytes_per_seconds` | average data throughput in indexer | -| `splunk_exporter_metric_` | Export from metric indexes | -| `splunk_exporter_health_splunkd` | Health status from local splunkd | -| `splunk_exporter_health_deployment` | Health status from deployment | +| Prefix | Labels | Description | +| ------------------------------------------------------ | ----------------------------- | ------------------------------------------------- | +| `splunk_exporter_index_` | `index_name` | Numerical data coming from data/indexes endpoint. | +| `splunk_exporter_indexer_throughput_bytes_per_seconds` | _None_ | Average data throughput in indexer | +| `splunk_exporter_metric_` | Dimensions returned by Splunk | Export from metric indexes | +| `splunk_exporter_health_splunkd` | `name` | Health status from local splunkd | +| `splunk_exporter_health_deployment` | `instance_id`, `name` | Health status from deployment | + +## ๐Ÿง‘โ€๐Ÿ”ฌ Testing + +```shell +go test -v ./... +``` -## โ›” Limitations +## โœจ Roadmap -Currently, only one splunk instance is supported \ No newline at end of file +| Item | Status | +| --------------------- | ----------------- | +| Metrics indexes | โœ… Done | +| Indexes metrics | ๐Ÿ•ฐ๏ธ Ongoing | +| Savedsearches metrics | ๐Ÿ”œ Next | +| System metrics | โ“ Not planned yet | +| Ingestion pipeline | โ“ Not planned yet | diff --git a/config/config.go b/config/config.go index d8bbd1c..4d6c37e 100644 --- a/config/config.go +++ b/config/config.go @@ -19,6 +19,8 @@ type Metric struct { type Config struct { URL string `yaml:"url"` Token string `yaml:"token"` + Username string `yaml:"username"` + Password string `yaml:"password"` Insecure bool `yaml:"insecure"` // defaults to false Metrics []Metric `yaml:"metrics"` } @@ -61,6 +63,7 @@ func (sc *SafeConfig) ReloadConfig(confFile string, logger log.Logger) (err erro return fmt.Errorf("error reading config file: %s", err) } defer yamlReader.Close() + decoder := yaml.NewDecoder(yamlReader) decoder.KnownFields(true) diff --git a/config/config_test.go b/config/config_test.go index 55debb8..c620a0f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -6,10 +6,43 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func TestLoadConfig(t *testing.T) { +// TestLoadConfigToken +// Given +// +// A valid config file using a token +// +// When +// +// reloading the config +// +// Then +// +// Config reload happens without error +func TestLoadConfigToken(t *testing.T) { sc := NewSafeConfig(prometheus.NewRegistry()) - err := sc.ReloadConfig("testdata/splunk_exporter-good.yml", nil) + err := sc.ReloadConfig("testdata/splunk_exporter-token-good.yml", nil) + if err != nil { + t.Errorf("Error loading config %v: %v", "splunk_exporter-good.yml", err) + } +} + +// TestLoadConfigUser +// Given +// +// A valid config file using a username and password +// +// When +// +// reloading the config +// +// Then +// +// Config reload happens without error +func TestLoadConfigUser(t *testing.T) { + sc := NewSafeConfig(prometheus.NewRegistry()) + + err := sc.ReloadConfig("testdata/splunk_exporter-user-good.yml", nil) if err != nil { t.Errorf("Error loading config %v: %v", "splunk_exporter-good.yml", err) } diff --git a/config/testdata/splunk_exporter-good.yml b/config/testdata/splunk_exporter-token-good.yml similarity index 100% rename from config/testdata/splunk_exporter-good.yml rename to config/testdata/splunk_exporter-token-good.yml diff --git a/deploy/splunk_exporter.yml.src b/config/testdata/splunk_exporter-user-good.yml similarity index 87% rename from deploy/splunk_exporter.yml.src rename to config/testdata/splunk_exporter-user-good.yml index 21a1507..4ac6d36 100644 --- a/deploy/splunk_exporter.yml.src +++ b/config/testdata/splunk_exporter-user-good.yml @@ -1,5 +1,6 @@ url: https://splunk:8089 -token: '${SPLUNK_TOKEN}' +username: toto +password: tutu insecure: true metrics: - index: _metrics diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index a9d1e0d..3544405 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -55,18 +55,55 @@ services: command: /splunk_exporter --config.file /splunk_exporter.yml --log.level=debug # add /etc/ssl/certs/ca-certificates.crt if needed + # splunk: + # image: splunk/splunk:9.2 + # restart: unless-stopped + # container_name: splunk + # environment: + # - SPLUNK_START_ARGS=--accept-license + # - SPLUNK_PASSWORD=splunkadmin + # expose: + # - 8000 + # - 8089 + # ports: + # - 8000:8000 + # - 8089:8089 + # networks: + # - monitoring + splunk: - image: splunk/splunk:9.2 - restart: unless-stopped container_name: splunk + networks: + monitoring: + aliases: + - splunk + image: ${SPLUNK_IMAGE:-splunk/splunk:latest} + hostname: splunk environment: - SPLUNK_START_ARGS=--accept-license + - SPLUNK_STANDALONE_URL=splunk + - DEBUG=true - SPLUNK_PASSWORD=splunkadmin - expose: + ports: - 8000 - 8089 - ports: - - 8000:8000 - - 8089:8089 + + dmc: + container_name: dmc networks: - - monitoring \ No newline at end of file + monitoring: + aliases: + - dmc + image: ${SPLUNK_IMAGE:-splunk/splunk:latest} + command: start + hostname: dmc + environment: + - SPLUNK_START_ARGS=--accept-license + - SPLUNK_STANDALONE_URL=splunk + - SPLUNK_ROLE=splunk_monitor + - SPLUNK_LICENSE_URI + - SPLUNK_PASSWORD=splunkadmin + - DEBUG=true + ports: + - 8000 + - 8089 \ No newline at end of file diff --git a/deploy/run.sh b/deploy/run.sh index 28f4734..e218ea9 100755 --- a/deploy/run.sh +++ b/deploy/run.sh @@ -5,19 +5,12 @@ set -e # print commands set -v -# initiate conf file -touch ./splunk_exporter.yml - # Start the stack -docker compose up -d prometheus grafana splunk - -# Wait for splunk to be initialized -until docker logs -n1 splunk 2>/dev/null | grep -q -m 1 '^Ansible playbook complete'; do sleep 0.2; done - -# Generate api key -export SPLUNK_TOKEN=$(curl -k -u admin:splunkadmin -X POST https://localhost:8089/services/authorization/tokens?output_mode=json --data name=admin --data audience=splunk_exporter | jq -r '.entry[0].content.token') -cat splunk_exporter.yml.src | envsubst > splunk_exporter.yml +export SPLUNK_IMAGE="splunk/splunk:9.3" +docker run --rm -it ${SPLUNK_IMAGE:-splunk/splunk:latest} create-defaults > default.yml +docker compose up -d --remove-orphans -# start splunk_exporter -docker compose up -d +# Please wait for Splunk to be initialized, check this with the command: +# docker compose logs dmc -f +# If you need to reload config, you may use the following command: # curl -X POST http://localhost:9115/-/reload diff --git a/deploy/splunk_exporter.yml b/deploy/splunk_exporter.yml new file mode 100644 index 0000000..b3d45ad --- /dev/null +++ b/deploy/splunk_exporter.yml @@ -0,0 +1,9 @@ +url: https://dmc:8089 +username: admin +password: splunkadmin +insecure: true +metrics: + - index: _metrics + name: spl.intr.disk_objects.Indexes.data.total_event_count + - index: _metrics + name: spl.intr.disk_objects.Indexes.data.total_bucket_count \ No newline at end of file diff --git a/exporter/exporter.go b/exporter/exporter.go index 1cd59a5..ab7ea8c 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -42,46 +42,81 @@ type Exporter struct { } func (e *Exporter) UpdateConf(conf *config.Config) { - // FIXME need to re-validate params - e.splunk.Client.TLSInsecureSkipVerify = conf.Insecure - e.splunk.Client.URL = conf.URL - e.splunk.Client.Authenticator = authenticators.Token{ - Token: conf.Token, + + opts := SplunkOpts{ + URI: conf.URL, + Token: conf.Token, + Username: conf.Username, + Password: conf.Password, + Insecure: conf.Insecure, + } + + client, err := getSplunkClient(opts, e.logger) + + if err != nil { + level.Error(e.logger).Log("msg", "Could not get Splunk client", "err", err) } + e.splunk.Client = client } type SplunkOpts struct { URI string Token string + Username string + Password string Insecure bool } -// New creates a new exporter for Splunk metrics -func New(opts SplunkOpts, logger log.Logger, metricsConf []config.Metric) (*Exporter, error) { +// getSplunkClient generates a Splunk client from parameters +// this function validates parameters and returns an error if they are not valid. +func getSplunkClient(opts SplunkOpts, logger log.Logger) (*splunkclient.Client, error) { - uri := opts.URI - if !strings.Contains(uri, "://") { - uri = "https://" + uri + if !strings.Contains(opts.URI, "://") { + opts.URI = "https://" + opts.URI } - u, err := url.Parse(uri) + u, err := url.Parse(opts.URI) if err != nil { return nil, fmt.Errorf("invalid splunk URL: %s", err) } if u.Host == "" || (u.Scheme != "http" && u.Scheme != "https") { - return nil, fmt.Errorf("invalid splunk URL: %s", uri) + return nil, fmt.Errorf("invalid splunk URL: %s", opts.URI) } - authenticator := authenticators.Token{ - Token: opts.Token, + var authenticator splunkclient.Authenticator + if len(opts.Token) > 0 { + level.Info(logger).Log("msg", "Token is defined, we will use it for authentication.") + authenticator = authenticators.Token{ + Token: opts.Token, + } + } else { + level.Info(logger).Log("msg", "Token is not defined, we will use password authentication.", "username", opts.Username) + if len(opts.Password) == 0 { + level.Warn(logger).Log("msg", "Password seems to be undefined.") + } + authenticator = &authenticators.Password{ + Username: opts.Username, + Password: opts.Password, + } } client := splunkclient.Client{ URL: opts.URI, Authenticator: authenticator, TLSInsecureSkipVerify: opts.Insecure, } + return &client, nil +} + +// New creates a new exporter for Splunk metrics +func New(opts SplunkOpts, logger log.Logger, metricsConf []config.Metric) (*Exporter, error) { + + client, err := getSplunkClient(opts, logger) + + if err != nil { + level.Error(logger).Log("msg", "Could not get Splunk client", "err", err) + } spk := splunklib.Splunk{ - Client: &client, + Client: client, Logger: logger, } @@ -126,13 +161,13 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { // collectConfiguredMetrics gets metric measures from splunk indexes as specified by configuration func (e *Exporter) collectConfiguredMetrics(ch chan<- prometheus.Metric) bool { - return e.indexedMetrics.ProcessMeasures(ch) + return e.indexedMetrics.CollectMeasures(ch) } // collectHealthMetrics grabs metrics from Splunk Health endpoints func (e *Exporter) collectHealthMetrics(ch chan<- prometheus.Metric) bool { - return e.healthMetrics.ProcessMeasures(ch) + return e.healthMetrics.CollectMeasures(ch) } func (e *Exporter) collectIndexerMetrics(ch chan<- prometheus.Metric) bool { diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go new file mode 100644 index 0000000..c94f610 --- /dev/null +++ b/exporter/exporter_test.go @@ -0,0 +1,4 @@ +package exporter + +// We shall test properly metrics being collected (from testdata/deploymenthealth.json for example). +// Weโ€™re waiting for https://github.com/prometheus/client_golang/issues/1639 to be resolved for this. diff --git a/exporter/health_manager.go b/exporter/health_manager.go index 3a46bdb..3bc006f 100644 --- a/exporter/health_manager.go +++ b/exporter/health_manager.go @@ -2,6 +2,7 @@ package exporter import ( "fmt" + "strings" splunklib "github.com/K-Yo/splunk_exporter/splunk" "github.com/go-kit/log" @@ -33,7 +34,7 @@ func newHealthManager(namespace string, spk *splunklib.Splunk, logger log.Logger deploymentDescriptor: prometheus.NewDesc( prometheus.BuildFQName(namespace, "health", "deployment"), "Splunk exported metric from deployment health API", - []string{"name"}, nil, + []string{"name", "instance_id"}, nil, ), } @@ -41,7 +42,7 @@ func newHealthManager(namespace string, spk *splunklib.Splunk, logger log.Logger return &hm } -func (hm *HealthManager) ProcessMeasures(ch chan<- prometheus.Metric) bool { +func (hm *HealthManager) CollectMeasures(ch chan<- prometheus.Metric) bool { // collect splunkd health metrics level.Info(hm.logger).Log("msg", "Collecting Splunkd Health measures") @@ -51,7 +52,7 @@ func (hm *HealthManager) ProcessMeasures(ch chan<- prometheus.Metric) bool { return false } - ret := hm.getMetricsSplunkd(ch, "", &splunkdHealth.Content) + ret := hm.collectMetricsSplunkd(ch, "", &splunkdHealth.Content) level.Info(hm.logger).Log("msg", "Done collecting Splunkd Health measures") @@ -65,15 +66,15 @@ func (hm *HealthManager) ProcessMeasures(ch chan<- prometheus.Metric) bool { return false } - ret = ret && hm.getMetricsDeployment(ch, "", deploymentHealth.Content.Features) + ret = ret && hm.collectMetricsDeployment(ch, "", deploymentHealth.Content.Features) level.Info(hm.logger).Log("msg", "Done collecting Deployment Health measures") return ret } -// getMetricsSplunkd recursively get all metric measures from a health endpoint result and sends them in the channel +// collectMetricsSplunkd recursively get all metric measures from a health endpoint result and sends them in the channel // disabled features are not measured -func (hm *HealthManager) getMetricsSplunkd(ch chan<- prometheus.Metric, path string, fh *splunklib.FeatureHealth) bool { +func (hm *HealthManager) collectMetricsSplunkd(ch chan<- prometheus.Metric, path string, fh *splunklib.FeatureHealth) bool { ret := true if !fh.Disabled { healthValue, err := hm.healthToFloat(fh.Health) @@ -91,16 +92,16 @@ func (hm *HealthManager) getMetricsSplunkd(ch chan<- prometheus.Metric, path str } for name, child := range fh.Features { - ret = ret && hm.getMetricsSplunkd(ch, fmt.Sprintf("%s/%s", path, name), &child) + ret = ret && hm.collectMetricsSplunkd(ch, fmt.Sprintf("%s/%s", path, name), &child) } return ret } -// getMetricsDeployment recursively get all metric measures from a health endpoint result and sends them in the channel +// collectMetricsDeployment recursively get all metric measures from a health endpoint result and sends them in the channel // disabled features are not measured -func (hm *HealthManager) getMetricsDeployment(ch chan<- prometheus.Metric, path string, data map[string]interface{}) bool { +func (hm *HealthManager) collectMetricsDeployment(ch chan<- prometheus.Metric, path string, data map[string]interface{}) bool { level.Debug(hm.logger).Log("msg", "Getting Deployment metrics", "path", path) ret := true var disabled bool = false @@ -132,15 +133,24 @@ func (hm *HealthManager) getMetricsDeployment(ch chan<- prometheus.Metric, path case map[string]interface{}: newPath := fmt.Sprintf("%s/%s", path, key) // recursively get lower level metrics - ret = ret && hm.getMetricsDeployment(ch, newPath, v) + ret = ret && hm.collectMetricsDeployment(ch, newPath, v) default: level.Error(hm.logger).Log("msg", "unknown type for key", "key", key, "path", path) } } level.Debug(hm.logger).Log("num_red", num_red, "num_yellow", num_yellow) + skipMetric := false + level.Debug(hm.logger).Log("path", path, "lenpath", len(path)) + skipMetric = skipMetric || disabled // ignore disabled metrics + skipMetric = skipMetric || health == "" // ignore when we cannot parse health + + // ignore when itโ€™s a metric ending with "/instances" + if len(path) > 10 { + skipMetric = skipMetric || (path[len(path)-10:] == "/instances") + } // Add current metric - if !disabled && health != "" { + if !skipMetric { healthValue, err := hm.healthToFloat(health) if err != nil { level.Error(hm.logger).Log("msg", "Cannot get metrics because of unknown health value", "path", path, "err", err) @@ -151,8 +161,18 @@ func (hm *HealthManager) getMetricsDeployment(ch chan<- prometheus.Metric, path displayPath = "/" } + // process when metrics are in the form of /splunkd/resource_usage/iowait/sum_top3_cpu_percs__max_last_3m/instances/8F8096AF-A456-4974-92FB-966103FA9752 + instanceId := "" + if strings.Contains(path, "/instances/") { + parts := strings.Split(path, "/") + if parts[len(parts)-2] == "instances" { + instanceId = parts[len(parts)-1] + } + displayPath = strings.Join(parts[:len(parts)-2], "/") + } + ch <- prometheus.MustNewConstMetric( - hm.deploymentDescriptor, prometheus.GaugeValue, healthValue, displayPath, + hm.deploymentDescriptor, prometheus.GaugeValue, healthValue, displayPath, instanceId, ) } diff --git a/exporter/health_manager_test.go b/exporter/health_manager_test.go index 2bdd327..e14328a 100644 --- a/exporter/health_manager_test.go +++ b/exporter/health_manager_test.go @@ -2,6 +2,7 @@ package exporter import ( "encoding/json" + "fmt" "os" "testing" @@ -14,10 +15,12 @@ import ( func TestDeployment(t *testing.T) { _, w, _ := os.Pipe() + defer w.Close() + logger := log.NewJSONLogger(w) hm := HealthManager{ logger: logger, - deploymentDescriptor: prometheus.NewDesc("metric", "", []string{"name"}, nil), + deploymentDescriptor: prometheus.NewDesc("metric", "", []string{"name", "instance_id"}, nil), } var deploymentHealth splunklib.HealthDeploymentDetails @@ -27,12 +30,25 @@ func TestDeployment(t *testing.T) { json.Unmarshal(fileContent, &deploymentHealth) - ret := hm.getMetricsDeployment( - make(chan<- prometheus.Metric), - "", - deploymentHealth.Content.Features, - ) - - assert.True(t, ret) + ch := make(chan prometheus.Metric) + + // launch collector + go func() { + ret := hm.collectMetricsDeployment( + ch, + "", + deploymentHealth.Content.Features, + ) + close(ch) + + assert.True(t, ret) + }() + + // empty chan + go func() { + for x := <-ch; x != nil; x = <-ch { + fmt.Fprintln(os.Stdout, x) + } + }() } diff --git a/exporter/metrics_manager.go b/exporter/metrics_manager.go index 42fb2b8..19dc349 100644 --- a/exporter/metrics_manager.go +++ b/exporter/metrics_manager.go @@ -43,9 +43,9 @@ func (mm *MetricsManager) Add(metric config.Metric) { } -// ProcessMeasures will get all measures and send generated metrics in channel +// CollectMeasures will get all measures and send generated metrics in channel // returns true if everything went well -func (mm *MetricsManager) ProcessMeasures(ch chan<- prometheus.Metric) bool { +func (mm *MetricsManager) CollectMeasures(ch chan<- prometheus.Metric) bool { level.Info(mm.logger).Log("msg", "Getting custom measures") processMetricCallback := func(measure splunklib.MetricMeasure, descriptor *prometheus.Desc) error { @@ -81,7 +81,7 @@ func (mm *MetricsManager) ProcessOneMeasure(key string, callback func(splunklib. level.Error(mm.logger).Log("msg", "Unknown metric name, this should not happen", "name", key) } if metric.Desc == nil { - level.Debug(mm.logger).Log("msg", "First time seing this metric, will create desc for it.", "name", key) + level.Debug(mm.logger).Log("msg", "First time seeing this metric, will create desc for it.", "name", key) name := mm.normalizeName(metric.Name) labelsMap, labelsPromNames := mm.getLabels(metric) diff --git a/main.go b/main.go index a901f54..89c2a11 100644 --- a/main.go +++ b/main.go @@ -71,6 +71,8 @@ func run() int { opts := exporter.SplunkOpts{ URI: sc.C.URL, Token: sc.C.Token, + Username: sc.C.Username, + Password: sc.C.Password, Insecure: sc.C.Insecure, } exp, err := exporter.New(opts, logger, sc.C.Metrics) diff --git a/splunk_exporter_example.yml b/splunk_exporter_example.yml index fb1f4f8..8553025 100644 --- a/splunk_exporter_example.yml +++ b/splunk_exporter_example.yml @@ -1,8 +1,12 @@ -# Splunk API endpoint + +# Monitoring console (or Search Head) url: https://localhost:8089 # Splunk API key token: '' +# OR +user: 'changeme' +password: 'changeme' # should we skip TLS validationโ€ฏ? insecure: false @@ -16,4 +20,4 @@ metrics: - index: _metrics name: spl.mlog.searchscheduler.max_lag - index: _metrics - name: spl.mlog.searchscheduler.skipped \ No newline at end of file + name: spl.mlog.searchscheduler.skipped