From e995c39b096761ef6b913ea9d78517d8208f11c2 Mon Sep 17 00:00:00 2001 From: Quitterie Lucas Date: Tue, 28 Sep 2021 09:30:48 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(backends)=20add=20graylog=20service?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LDP backends is available for tests.But until now, no service was available in the project. We have implemented a graylog service for it. --- .env.dist | 5 +++ CHANGELOG.md | 1 + Makefile | 9 ++++ docker-compose.yml | 21 ++++++++- setup.cfg | 1 + src/ralph/backends/__init__.py | 1 + src/ralph/backends/logging/__init__.py | 0 src/ralph/backends/logging/base.py | 17 ++++++++ src/ralph/backends/logging/graylog.py | 59 ++++++++++++++++++++++++++ src/ralph/cli.py | 14 +++++- src/ralph/defaults.py | 11 +++++ src/ralph/utils.py | 7 ++- tests/backends/logging/__init__.py | 0 tests/backends/logging/test_base.py | 22 ++++++++++ tests/backends/logging/test_graylog.py | 18 ++++++++ tests/test_cli.py | 8 +++- 16 files changed, 187 insertions(+), 7 deletions(-) create mode 100644 src/ralph/backends/logging/__init__.py create mode 100644 src/ralph/backends/logging/base.py create mode 100644 src/ralph/backends/logging/graylog.py create mode 100644 tests/backends/logging/__init__.py create mode 100644 tests/backends/logging/test_base.py create mode 100644 tests/backends/logging/test_graylog.py diff --git a/.env.dist b/.env.dist index bae611d1f..812d2e88c 100644 --- a/.env.dist +++ b/.env.dist @@ -4,6 +4,11 @@ RALPH_APP_DIR=/app/.ralph # Uncomment lines (by removing # characters at the beginning of target lines) # to define environment variables associated to the backend(s) you need. +# Graylog storage backend + +# RALPH_GRAYLOG_HOST=graylog +# RALPH_GRAYLOG_PORT=12201 + # LDP storage backend # # You need to generate an API token for your OVH's account and fill the service diff --git a/CHANGELOG.md b/CHANGELOG.md index a40029f03..069447f4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to ### Added +- Implement `graylog` backend - Implement edx problem interaction events pydantic models - Implement edx textbook interaction events pydantic models - `ws` websocket stream backend (compatible with the `fetch` command) diff --git a/Makefile b/Makefile index bcceec1a2..6626f66c5 100644 --- a/Makefile +++ b/Makefile @@ -114,6 +114,7 @@ run: \ run-all: ## start all supported local backends run-all: \ run-es \ + run-graylog \ run-swift .PHONY: run-all @@ -123,6 +124,14 @@ run-es: ## start elasticsearch backend @$(COMPOSE_RUN) dockerize -wait tcp://elasticsearch:9200 -timeout 60s .PHONY: run-es +run-graylog: ## start graylog backend + @$(COMPOSE) up -d graylog + @echo "Waiting for graylog to be up and running..." + @$(COMPOSE_RUN) dockerize -wait tcp://mongo:27017 -timeout 60s + @$(COMPOSE_RUN) dockerize -wait tcp://elasticsearch:9200 -timeout 60s + @$(COMPOSE_RUN) dockerize -wait tcp://graylog:9000 -timeout 60s +.PHONY: run-graylog + run-swift: ## start swift backend @$(COMPOSE) up -d swift @echo "Waiting for swift to be up and running..." diff --git a/docker-compose.yml b/docker-compose.yml index b2c894eea..394c46b68 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: '3.4' +version: "3.4" services: app: @@ -37,6 +37,25 @@ services: environment: KS_SWIFT_PUBLIC_URL: http://127.0.0.1:49177 + mongo: + image: mongo:4.2 + networks: + default: + + graylog: + image: graylog/graylog:4.0 + environment: + - GRAYLOG_HTTP_EXTERNAL_URI=http://localhost:9000/ + entrypoint: /usr/bin/tini -- wait-for-it elasticsearch:9200 -- /docker-entrypoint.sh + networks: + default: + depends_on: + - mongo + - elasticsearch + ports: + - 9000:9000 + - 12201:12201 + # -- tools dockerize: image: jwilder/dockerize diff --git a/setup.cfg b/setup.cfg index cfaa4fe12..3163a6ec5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,6 +52,7 @@ dev = ipython==7.28.0 isort==5.9.3 logging-gelf==0.0.26 + marshmallow==3.13.0 memory-profiler==0.58.0 mkdocs==1.2.3 mkdocs-click==0.4.0 diff --git a/src/ralph/backends/__init__.py b/src/ralph/backends/__init__.py index 27ab7844c..cbad2b294 100644 --- a/src/ralph/backends/__init__.py +++ b/src/ralph/backends/__init__.py @@ -7,5 +7,6 @@ class BackendTypes(Enum): """Backend types""" DATABASE = auto() + LOGGING = auto() STORAGE = auto() STREAM = auto() diff --git a/src/ralph/backends/logging/__init__.py b/src/ralph/backends/logging/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ralph/backends/logging/base.py b/src/ralph/backends/logging/base.py new file mode 100644 index 000000000..4a3e0609b --- /dev/null +++ b/src/ralph/backends/logging/base.py @@ -0,0 +1,17 @@ +"""Base logging backend for Ralph""" + +from abc import ABC, abstractmethod + + +class BaseLogging(ABC): + """Base logging backend interface""" + + name = "base" + + @abstractmethod + def get(self, chunk_size=10): + """Read chunk_size records and stream them to stdout""" + + @abstractmethod + def send(self, chunk_size=10, ignore_errors=False): + """Write chunk_size records from stdin""" diff --git a/src/ralph/backends/logging/graylog.py b/src/ralph/backends/logging/graylog.py new file mode 100644 index 000000000..2c35bf2e1 --- /dev/null +++ b/src/ralph/backends/logging/graylog.py @@ -0,0 +1,59 @@ +"""Graylog storage backend for Ralph""" + +from itertools import zip_longest +import logging +import sys + +from logging_gelf.formatters import GELFFormatter +from logging_gelf.handlers import GELFTCPSocketHandler + +from ...defaults import RALPH_GRAYLOG_HOST, RALPH_GRAYLOG_PORT +from ..mixins import HistoryMixin +from .base import BaseLogging + +logger = logging.getLogger(__name__) + + +class GraylogLogging(HistoryMixin, BaseLogging): + """Graylog logging backend""" + + # pylint: disable=too-many-arguments + + name = "graylog" + + def __init__( + self, + host=RALPH_GRAYLOG_HOST, + port=RALPH_GRAYLOG_PORT, + client_options=None, + ): + if client_options is None: + client_options = {} + + self.host = host + self.port = port + + self.gelf_logger = logging.getLogger("gelf") + self.gelf_logger.setLevel(logging.INFO) + + def send(self, chunk_size, ignore_errors=False): + """Send logs in graylog backend (one JSON event per line).""" + + logger.debug("Logging events (chunk size: %d)", chunk_size) + + chunks = zip_longest(*([iter(sys.stdin.readlines())] * chunk_size)) + + handler = GELFTCPSocketHandler(host=self.host, port=self.port) + handler.setFormatter(GELFFormatter()) + self.gelf_logger.addHandler(handler) + + for chunk in chunks: + for event in chunk: + self.gelf_logger.info(event) + + def get(self, chunk_size=10): + """Read chunk_size records and stream them to stdout.""" + + msg = "Graylog storage backend is write-only, cannot read from" + logger.error(msg) + raise NotImplementedError(msg) diff --git a/src/ralph/cli.py b/src/ralph/cli.py index bccaa2c50..5a4f34c93 100644 --- a/src/ralph/cli.py +++ b/src/ralph/cli.py @@ -15,6 +15,7 @@ DEFAULT_BACKEND_CHUNK_SIZE, ENVVAR_PREFIX, DatabaseBackends, + LoggingBackends, Parsers, StorageBackends, StreamBackends, @@ -37,10 +38,13 @@ # Lazy evaluations DATABASE_BACKENDS = (lambda: [backend.value for backend in DatabaseBackends])() +LOGGING_BACKENDS = (lambda: [backend.value for backend in LoggingBackends])() PARSERS = (lambda: [parser.value for parser in Parsers])() STORAGE_BACKENDS = (lambda: [backend.value for backend in StorageBackends])() STREAM_BACKENDS = (lambda: [backend.value for backend in StreamBackends])() -BACKENDS = (lambda: DATABASE_BACKENDS + STORAGE_BACKENDS + STREAM_BACKENDS)() +BACKENDS = ( + lambda: DATABASE_BACKENDS + LOGGING_BACKENDS + STORAGE_BACKENDS + STREAM_BACKENDS +)() class CommaSeparatedKeyValueParamType(click.ParamType): @@ -307,6 +311,8 @@ def fetch(backend, archive, chunk_size, **options): backend.get(chunk_size=chunk_size) elif backend_type == BackendTypes.STREAM: backend.stream() + elif backend_type == BackendTypes.LOGGING: + backend.get(chunk_size) elif backend_type is None: msg = "Cannot find an implemented backend type for backend %s" logger.error(msg, backend) @@ -314,7 +320,9 @@ def fetch(backend, archive, chunk_size, **options): @click.argument("archive", required=False) -@backends_options(backends=(lambda: DATABASE_BACKENDS + STORAGE_BACKENDS)()) +@backends_options( + backends=(lambda: DATABASE_BACKENDS + LOGGING_BACKENDS + STORAGE_BACKENDS)() +) @click.option( "-c", "--chunk-size", @@ -348,6 +356,8 @@ def push(backend, archive, chunk_size, force, ignore_errors, **options): if backend_type == BackendTypes.STORAGE: backend.write(archive, overwrite=force) + elif backend_type == BackendTypes.LOGGING: + backend.send(chunk_size, ignore_errors) elif backend_type == BackendTypes.DATABASE: backend.put(chunk_size=chunk_size, ignore_errors=ignore_errors) elif backend_type is None: diff --git a/src/ralph/defaults.py b/src/ralph/defaults.py index aa422dbff..3e6f77c69 100644 --- a/src/ralph/defaults.py +++ b/src/ralph/defaults.py @@ -20,6 +20,15 @@ class DatabaseBackends(Enum): ES = "ralph.backends.database.es.ESDatabase" +class LoggingBackends(Enum): + """Enumerate active logging backend modules. + + Adding an entry to this enum will make it available to the CLI. + """ + + GRAYLOG = "ralph.backends.logging.graylog.GraylogLogging" + + class Parsers(Enum): """Enumerate active parsers modules. @@ -127,3 +136,5 @@ def load_config(config_file_path): "RALPH_CONVERTER_EDX_XAPI_UUID_NAMESPACE", None ) EXECUTION_ENVIRONMENT = config("RALPH_EXECUTION_ENVIRONMENT", "development") +RALPH_GRAYLOG_HOST = config("RALPH_GRAYLOG_HOST", "graylog") +RALPH_GRAYLOG_PORT = config("RALPH_GRAYLOG_PORT", 12201) diff --git a/src/ralph/utils.py b/src/ralph/utils.py index 4dc3a5add..28299007e 100644 --- a/src/ralph/utils.py +++ b/src/ralph/utils.py @@ -8,6 +8,7 @@ from ralph.backends import BackendTypes from ralph.backends.database.base import BaseDatabase as BaseDatabaseBackend +from ralph.backends.logging.base import BaseLogging as BaseLoggingBackend from ralph.backends.storage.base import BaseStorage as BaseStorageBackend from ralph.backends.stream.base import BaseStream as BaseStreamBackend @@ -37,10 +38,12 @@ def import_string(dotted_path): def get_backend_type(backend_class): """Get backend type from a backend class""" - if BaseStorageBackend in backend_class.__mro__: - return BackendTypes.STORAGE if BaseDatabaseBackend in backend_class.__mro__: return BackendTypes.DATABASE + if BaseLoggingBackend in backend_class.__mro__: + return BackendTypes.LOGGING + if BaseStorageBackend in backend_class.__mro__: + return BackendTypes.STORAGE if BaseStreamBackend in backend_class.__mro__: return BackendTypes.STREAM return None diff --git a/tests/backends/logging/__init__.py b/tests/backends/logging/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/backends/logging/test_base.py b/tests/backends/logging/test_base.py new file mode 100644 index 000000000..fdad0ed54 --- /dev/null +++ b/tests/backends/logging/test_base.py @@ -0,0 +1,22 @@ +"""Tests for Ralph base logging backend""" + +from ralph.backends.logging.base import BaseLogging + + +def test_backends_logging_base_abstract_interface_with_implemented_abstract_method(): + """Tests the interface mechanism with properly implemented abstract methods.""" + + class GoodLogging(BaseLogging): + """Correct implementation with required abstract methods.""" + + name = "good" + + def get(self, chunk_size=10): + """Fakes the get method.""" + + def send(self, chunk_size=10, ignore_errors=False): + """Fakes the send method.""" + + GoodLogging() + + assert GoodLogging.name == "good" diff --git a/tests/backends/logging/test_graylog.py b/tests/backends/logging/test_graylog.py new file mode 100644 index 000000000..56bbab40b --- /dev/null +++ b/tests/backends/logging/test_graylog.py @@ -0,0 +1,18 @@ +"""Tests for Ralph graylog storage backend""" + +from ralph.backends.logging.graylog import GraylogLogging +from ralph.defaults import RALPH_GRAYLOG_HOST, RALPH_GRAYLOG_PORT + + +def test_backends_logging_graylog_logging_instantiation(): + """Tests the GraylogLogging backend instantiation.""" + # pylint: disable=protected-access + + storage = GraylogLogging( + host=RALPH_GRAYLOG_HOST, + port=RALPH_GRAYLOG_PORT, + ) + + assert storage.name == "graylog" + assert storage.host == "graylog" + assert storage.port == 12201 diff --git a/tests/test_cli.py b/tests/test_cli.py index 6903a8a00..9a28e9f8b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -307,12 +307,16 @@ def test_cli_fetch_command_usage(): " --ldp-application-secret TEXT\n" " --ldp-application-key TEXT\n" " --ldp-endpoint TEXT\n" + " graylog backend: \n" + " --graylog-client-options TEXT\n" + " --graylog-port INTEGER\n" + " --graylog-host TEXT\n" " es backend: \n" " --es-op-type TEXT\n" " --es-client-options KEY=VALUE,KEY=VALUE\n" " --es-index TEXT\n" " --es-hosts TEXT\n" - " -b, --backend [es|ldp|fs|swift|ws]\n" + " -b, --backend [es|graylog|ldp|fs|swift|ws]\n" " Backend [required]\n" " -c, --chunk-size INTEGER Get events by chunks of size #\n" ) in result.output @@ -321,7 +325,7 @@ def test_cli_fetch_command_usage(): assert result.exit_code > 0 assert ( "Error: Missing option '-b' / '--backend'. " - "Choose from:\n\tes,\n\tldp,\n\tfs,\n\tswift,\n\tws\n" + "Choose from:\n\tes,\n\tgraylog,\n\tldp,\n\tfs,\n\tswift,\n\tws\n" ) in result.output