diff --git a/README.md b/README.md index 7ca9ac1..259c15b 100644 --- a/README.md +++ b/README.md @@ -281,7 +281,7 @@ kubeconfig_location: "{{env(KUBECONFIG)}}" ``` -## himl config merger +## himl-config-merger The `himl-config-merger` script, contains logic of merging a hierarchical config directory and creating the end result YAML files. @@ -343,6 +343,56 @@ Leveraging HIML, the config-merger script loads the configs tree structure and d Under each level, there is a mandatory "level key" that is used by config-merger for computing the end result. This key should be present in one of the files under each level. (eg. env.yaml under env). +### Output filtering + +Some configs that are specified in the higher levels of the directory tree might not be needed in the end (leaf) result. For this reason, the config-merger script can apply a set of filter rules that are specified via the `--filter-rules-key` parameter. This property must be present in the config and contains rules for removing root level keys from the output. The filter is applied if the selector object matches a subset of the output keys and will keep the keys specified in the `values` list or the keys that match the `regex` pattern. + + +```yaml +# intermediate config after hierarchical merge +env: dev +cluster: cluster1 +region: us-east-1 +key1: persisted +key2: dropped +keep_1: persisted +tags: + cost_center: 123 +_filters: +- selector: + env: "dev" + keys: + values: + - key1 + regex: "keep_.*" +- selector: + cluster: + regex: "cluster1" + keys: + values: + - tags +``` + +Build the output with filtering: +```sh +himl-config-merger examples/filters --output-dir merged_output --levels env region cluster --leaf-directories cluster --filter-rules-key _filters +``` + +```yaml +# output after filtering +env: dev +cluster: cluster1 +region: us-east-1 +key1: persisted +keep_1: persisted +tags: + cost_center: 123 +``` +#### Filtering limitations + +Rule selectors and keys filtering only works at the root level of the config. It is not possible to filter nested keys. + + ### Extra merger features Apart from the standard features found in the `PyYaml` library, the `himl-config-merger` component also implements a custom YAML tag called `!include`. diff --git a/examples/filters/default.yaml b/examples/filters/default.yaml new file mode 100644 index 0000000..68755f3 --- /dev/null +++ b/examples/filters/default.yaml @@ -0,0 +1,66 @@ +env: default +region: default +cluster: default + +cluster_info: + name: default # this will be overridden by the inner cluster.yaml file + + # Interpolation example + description: "This is cluster: {{cluster}}. It is using {{cluster_info.node_type}} instance type." + node_type: c3.2xlarge # default value, which can be overridden by each cluster +cluster_metrics: + - id: 1 + metric: cpu + value: 90 + - id: 2 + metric: memory + value: 90 + - id: 3 + metric: disk + value: 90 +metrics: + - cpu + - memory + - disk +myList: + - id1 + - id4 +# Fetching the secret value at runtime, from a secrets store (in this case AWS SSM). +# passphrase: "{{ssm.path(/key/coming/from/aws/secrets/store/manager).aws_profile(myprofile)}}" + +# Fetching the value at runtime from S3 +# my_secret: "{{s3.bucket(my-bucket).path(path/to/file.txt).base64encode(true).aws_profile(myprofile)}}" + + +_filters: + # Keep _filters key for all outputs. No selector matches all outputs by default. + # - keys: + # values: + # - "_filters" + + - selector: + cluster: "cluster.*" + keys: + values: + - persisted_key + # - persisted_key_referenced + # - persisted_key_to_drop + # - persisted_key_to_drop2 + # - cluster_persisted_object + # - cluster_persisted_list + + - selector: + cluster: cluster1 + keys: + values: + - testkey + - home + - cluster_persisted_key + + - selector: + cluster: cluster2 + keys: + values: + - metrics + - myList + regex: ".*persisted.*" diff --git a/examples/filters/env=dev/env.yaml b/examples/filters/env=dev/env.yaml new file mode 100644 index 0000000..44389fb --- /dev/null +++ b/examples/filters/env=dev/env.yaml @@ -0,0 +1,6 @@ +env: dev +persisted_key: &persisted persisted key +dropped_key: &dropped object will be filtered out +persisted_key_referenced: *persisted +persisted_key_to_drop: *dropped +persisted_key_to_drop2: *dropped diff --git a/examples/filters/env=dev/region=us-east-1/cluster=cluster1/cluster.yaml b/examples/filters/env=dev/region=us-east-1/cluster=cluster1/cluster.yaml new file mode 100644 index 0000000..af86b70 --- /dev/null +++ b/examples/filters/env=dev/region=us-east-1/cluster=cluster1/cluster.yaml @@ -0,0 +1,21 @@ +cluster: cluster1 + +testkey: |- + # Set to true to log user information returned from LDAP + verbose_logging = true + + [[servers]] + # Ldap server host + host = "someaddress" + + # Default port is 389 or 636 if use_ssl = true + port = 389 + + start_tls = true + +cluster_persisted_key: this object will be persisted +cluster_filtered_key: this object will be filtered out +cluster_persisted_list: "{{ myList }}" +cluster_persisted_object: + cluster_info: "{{ cluster_info }}" + cluster_list: "{{ myList }}" diff --git a/examples/filters/env=dev/region=us-east-1/cluster=cluster2/cluster.yaml b/examples/filters/env=dev/region=us-east-1/cluster=cluster2/cluster.yaml new file mode 100644 index 0000000..2a37034 --- /dev/null +++ b/examples/filters/env=dev/region=us-east-1/cluster=cluster2/cluster.yaml @@ -0,0 +1,23 @@ +cluster: cluster2 +cluster_metrics: + - id: 1 + metric: cpu + value: 95 + - id: 2 + metric: memory + value: 95 + - id: 3 + metric: disk + remove: True + - metric: exec + value: 5 +metrics: + - cpu + - exec +myList: + - id1 + - id2 + - id3 +persisted_key: this object will be persisted +dropped_key: this object will be dropped +another_persisted_key: this object will also be persisted diff --git a/examples/filters/env=dev/region=us-east-1/region.yaml b/examples/filters/env=dev/region=us-east-1/region.yaml new file mode 100644 index 0000000..bfc3ab4 --- /dev/null +++ b/examples/filters/env=dev/region=us-east-1/region.yaml @@ -0,0 +1 @@ +region: us-east-1 diff --git a/examples/filters/env=dev/region=us-west-2/cluster=cluster1/cluster.yaml b/examples/filters/env=dev/region=us-west-2/cluster=cluster1/cluster.yaml new file mode 100644 index 0000000..9fbe122 --- /dev/null +++ b/examples/filters/env=dev/region=us-west-2/cluster=cluster1/cluster.yaml @@ -0,0 +1,2 @@ +cluster: cluster1 +home: "{{env(HOME)}}" diff --git a/examples/filters/env=dev/region=us-west-2/region.yaml b/examples/filters/env=dev/region=us-west-2/region.yaml new file mode 100644 index 0000000..9b40416 --- /dev/null +++ b/examples/filters/env=dev/region=us-west-2/region.yaml @@ -0,0 +1 @@ +region: us-west-2 diff --git a/examples/filters/env=prod/env.yaml b/examples/filters/env=prod/env.yaml new file mode 100644 index 0000000..2fdef9a --- /dev/null +++ b/examples/filters/env=prod/env.yaml @@ -0,0 +1 @@ +env: prod diff --git a/examples/filters/env=prod/region=eu-west-2/cluster=ireland1/cluster.yaml b/examples/filters/env=prod/region=eu-west-2/cluster=ireland1/cluster.yaml new file mode 100644 index 0000000..6338928 --- /dev/null +++ b/examples/filters/env=prod/region=eu-west-2/cluster=ireland1/cluster.yaml @@ -0,0 +1,3 @@ +cluster: ireland1 + +file: "{{cwd}}/test.txt" diff --git a/examples/filters/env=prod/region=eu-west-2/region.yaml b/examples/filters/env=prod/region=eu-west-2/region.yaml new file mode 100644 index 0000000..f3e650d --- /dev/null +++ b/examples/filters/env=prod/region=eu-west-2/region.yaml @@ -0,0 +1 @@ +region: eu-west-2 diff --git a/himl/config_merger.py b/himl/config_merger.py old mode 100644 new mode 100755 index 670d406..1bb53c9 --- a/himl/config_merger.py +++ b/himl/config_merger.py @@ -16,7 +16,7 @@ import yaml from .config_generator import ConfigProcessor from multiprocessing import Pool, cpu_count - +from .filter_rules import FilterRules logger = logging.getLogger(__name__) @@ -71,7 +71,7 @@ def __traverse_path(self, path: str, yaml_dict: dict): current_key, yaml_dict)) -def merge_configs(directories, levels, output_dir, enable_parallel): +def merge_configs(directories, levels, output_dir, enable_parallel, filter_rules): """ Method for running the merge configuration logic under different formats :param directories: list of paths for leaf directories @@ -82,7 +82,7 @@ def merge_configs(directories, levels, output_dir, enable_parallel): config_processor = ConfigProcessor() process_config = [] for path in directories: - process_config.append((config_processor, path, levels, output_dir)) + process_config.append((config_processor, path, levels, output_dir, filter_rules)) if enable_parallel: logger.info("Processing config in parallel") @@ -102,6 +102,7 @@ def merge_logic(process_params): path = process_params[1] levels = process_params[2] output_dir = process_params[3] + filter_rules = process_params[4] # load the !include tag Loader.add_constructor('!include', Loader.include) @@ -121,6 +122,12 @@ def merge_logic(process_params): if not os.path.exists(publish_path): os.makedirs(publish_path) + if filter_rules: + if filter_rules not in output: + raise Exception("Filter rule key '{}' not found in config".format(filter_rules)) + filter = FilterRules(output[filter_rules], levels) + filter.run(output) + # create the yaml file for output using the publish_path and last level_values element filename = "{0}/{1}.yaml".format(publish_path, level_values[-1]) logger.info("Found input config directory: %s", path) @@ -171,6 +178,8 @@ def parser_options(args): help='leaf directories, for instance: cluster', required=True) parser.add_argument('--enable-parallel', dest='enable_parallel', default=False, action='store_true', help='Process config using multiprocessing') + parser.add_argument('--filter-rules-key', dest='filter_rules', default=None, type=str, + help='keep these keys from the generated data, based on the configured filter key') return parser.parse_args(args) @@ -182,4 +191,4 @@ def run(args=None): # merge the configs using HIML merge_configs(dirs, opts.hierarchy_levels, - opts.output_dir, opts.enable_parallel) + opts.output_dir, opts.enable_parallel, opts.filter_rules) diff --git a/himl/filter_rules.py b/himl/filter_rules.py new file mode 100644 index 0000000..6776ea7 --- /dev/null +++ b/himl/filter_rules.py @@ -0,0 +1,37 @@ +import re + + +class FilterRules(object): + + def __init__(self, rules, levels): + self.rules = rules + self.levels = levels + + def run(self, output): + + removable_keys = set(output.keys()) - set(self.levels) + + for filter in self.rules: + selector = filter.get("selector", {}) + if type(selector) != dict: + raise Exception("Filter selector must be a dictionary") + + if not self.match(output, selector): + continue + + keys = filter.get("keys") + if "values" in keys: + removable_keys = removable_keys - set(keys["values"]) + if "regex" in keys: + key_re = re.compile(keys["regex"]) + removable_keys = {k for k in removable_keys if not key_re.match(k)} + + for key in removable_keys: + del output[key] + + def match(self, output, selector): + for key, pattern in selector.items(): + value = "" if key not in output else output[key] + if not re.match(pattern, value): + return False + return True diff --git a/himl/main.py b/himl/main.py index 2eeb400..904a2b3 100644 --- a/himl/main.py +++ b/himl/main.py @@ -37,8 +37,8 @@ def do_run(self, opts): opts.print_data = True config_processor = ConfigProcessor() - - config_processor.process(cwd, opts.path, filters, excluded_keys, opts.enclosing_key, opts.remove_enclosing_key, + + config_processor.process(cwd, opts.path, filters, filter_config, excluded_keys, opts.enclosing_key, opts.remove_enclosing_key, opts.output_format, opts.print_data, opts.output_file, opts.skip_interpolation_resolving, opts.skip_interpolation_validation, opts.skip_secrets, opts.multi_line_string, type_strategies= [(list, [opts.merge_list_strategy.value]), (dict, ["merge"])] )