From 9b158cef28513797ad99d3b1cc1e338715a29911 Mon Sep 17 00:00:00 2001 From: Casey McGinley Date: Wed, 15 Jan 2025 16:32:50 -0800 Subject: [PATCH 1/4] initial commit; migrated integration testing to RBA structures; littered code w/ comments for cleanup before merge --- .../DetectionTestingInfrastructure.py | 8 +- .../detection_abstract.py | 1 + contentctl/objects/constants.py | 3 + contentctl/objects/correlation_search.py | 137 +++++------ contentctl/objects/detection_tags.py | 9 +- contentctl/objects/drilldown.py | 1 + contentctl/objects/observable.py | 1 + contentctl/objects/rba.py | 1 + contentctl/objects/risk_event.py | 223 +++++++----------- 9 files changed, 175 insertions(+), 209 deletions(-) diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py index 7804b29e..42cad6c0 100644 --- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py @@ -1094,6 +1094,7 @@ def retry_search_until_timeout( job = self.get_conn().search(query=search, **kwargs) results = JSONResultsReader(job.results(output_mode="json")) + # TODO (cmcginley): @ljstella you're removing this ultimately, right? # Consolidate a set of the distinct observable field names observable_fields_set = set([o.name for o in detection.tags.observable]) # keeping this around for later risk_object_fields_set = set([o.name for o in detection.tags.observable if "Victim" in o.role ]) # just the "Risk Objects" @@ -1121,7 +1122,10 @@ def retry_search_until_timeout( missing_risk_objects = risk_object_fields_set - results_fields_set if len(missing_risk_objects) > 0: # Report a failure in such cases - e = Exception(f"The observable field(s) {missing_risk_objects} are missing in the detection results") + e = Exception( + f"The risk object field(s) {missing_risk_objects} are missing in the " + "detection results" + ) test.result.set_job_content( job.content, self.infrastructure, @@ -1137,6 +1141,8 @@ def retry_search_until_timeout( # on a field. In this case, the field will appear but will not contain any values current_empty_fields: set[str] = set() + # TODO (cmcginley): @ljstella is this something we're keeping for testing as + # well? for field in observable_fields_set: if result.get(field, 'null') == 'null': if field in risk_object_fields_set: diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index 81297df9..7d2b0386 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -279,6 +279,7 @@ def source(self) -> str: deployment: Deployment = Field({}) + # TODO (cmcginley): @ljstella removing the refs to confidence and impact? @computed_field @property def annotations(self) -> dict[str, Union[List[str], int, str]]: diff --git a/contentctl/objects/constants.py b/contentctl/objects/constants.py index 8a32f28d..f0f0d8f3 100644 --- a/contentctl/objects/constants.py +++ b/contentctl/objects/constants.py @@ -79,6 +79,7 @@ "Actions on Objectives": 7 } +# TODO (cmcginley): @ljstella should this be removed? also referenced in new_content.py SES_OBSERVABLE_ROLE_MAPPING = { "Other": -1, "Unknown": 0, @@ -93,6 +94,7 @@ "Observer": 9 } +# TODO (cmcginley): @ljstella should this be removed? also referenced in new_content.py SES_OBSERVABLE_TYPE_MAPPING = { "Unknown": 0, "Hostname": 1, @@ -135,6 +137,7 @@ "Impact": "TA0040" } +# TODO (cmcginley): is this just for the transition testing? RBA_OBSERVABLE_ROLE_MAPPING = { "Attacker": 0, "Victim": 1 diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py index 2a8d1e9c..b0cffe4e 100644 --- a/contentctl/objects/correlation_search.py +++ b/contentctl/objects/correlation_search.py @@ -29,7 +29,6 @@ from contentctl.objects.detection import Detection from contentctl.objects.risk_event import RiskEvent from contentctl.objects.notable_event import NotableEvent -from contentctl.objects.observable import Observable # Suppress logging by default; enable for local testing @@ -145,24 +144,24 @@ def __init__(self, response_reader: ResponseReader) -> None: def __iter__(self) -> "ResultIterator": return self - def __next__(self) -> dict: + def __next__(self) -> dict[Any, Any]: # Use a reader for JSON format so we can iterate over our results for result in self.results_reader: # log messages, or raise if error if isinstance(result, Message): # convert level string to level int - level_name = result.type.strip().upper() + level_name = result.type.strip().upper() # type: ignore level: int = logging.getLevelName(level_name) # log message at appropriate level and raise if needed - message = f"SPLUNK: {result.message}" + message = f"SPLUNK: {result.message}" # type: ignore self.logger.log(level, message) if level == logging.ERROR: raise ServerError(message) # if dict, just return elif isinstance(result, dict): - return result + return result # type: ignore # raise for any unexpected types else: @@ -310,9 +309,11 @@ def earliest_time(self) -> str: The earliest time configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY] + return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def latest_time(self) -> str: @@ -320,9 +321,11 @@ def latest_time(self) -> str: The latest time configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY] + return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def cron_schedule(self) -> str: @@ -330,9 +333,11 @@ def cron_schedule(self) -> str: The cron schedule configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY] + return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def enabled(self) -> bool: @@ -340,12 +345,14 @@ def enabled(self) -> bool: Whether the saved search is enabled """ if self.saved_search is not None: - if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY]): + if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY]): # type: ignore return False else: return True else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @ property def has_risk_analysis_action(self) -> bool: @@ -387,19 +394,6 @@ def _get_notable_action(content: dict[str, Any]) -> NotableAction | None: return NotableAction.parse_from_dict(content) return None - @staticmethod - def _get_relevant_observables(observables: list[Observable]) -> list[Observable]: - """ - Given a list of observables, identify the subset of those relevant for risk matching - :param observables: the Observable objects to filter - :returns: the filtered list of relevant observables - """ - relevant = [] - for observable in observables: - if not RiskEvent.ignore_observable(observable): - relevant.append(observable) - return relevant - def _parse_risk_and_notable_actions(self) -> None: """Parses the risk/notable metadata we care about from self.saved_search.content @@ -495,7 +489,7 @@ def update_timeframe( if refresh: self.refresh() - def force_run(self, refresh=True) -> None: + def force_run(self, refresh: bool = True) -> None: """Forces a detection run Enables the detection, adjusts the cron schedule to run every 1 minute, and widens the earliest/latest window @@ -506,7 +500,7 @@ def force_run(self, refresh=True) -> None: if not self.enabled: self.enable(refresh=False) else: - self.logger.warn(f"Detection '{self.name}' was already enabled") + self.logger.warning(f"Detection '{self.name}' was already enabled") if refresh: self.refresh() @@ -557,7 +551,7 @@ def get_risk_events(self, force_update: bool = False) -> list[RiskEvent]: if result["index"] == Indexes.RISK_INDEX: try: parsed_raw = json.loads(result["_raw"]) - event = RiskEvent.parse_obj(parsed_raw) + event = RiskEvent.model_validate(parsed_raw) except Exception: self.logger.error(f"Failed to parse RiskEvent from search result: {result}") raise @@ -622,7 +616,7 @@ def get_notable_events(self, force_update: bool = False) -> list[NotableEvent]: if result["index"] == Indexes.NOTABLE_INDEX: try: parsed_raw = json.loads(result["_raw"]) - event = NotableEvent.parse_obj(parsed_raw) + event = NotableEvent.model_validate(parsed_raw) except Exception: self.logger.error(f"Failed to parse NotableEvent from search result: {result}") raise @@ -646,22 +640,26 @@ def validate_risk_events(self) -> None: """Validates the existence of any expected risk events First ensure the risk event exists, and if it does validate its risk message and make sure - any events align with the specified observables. Also adds the risk index to the purge list + any events align with the specified risk object. Also adds the risk index to the purge list if risk events existed :param elapsed_sleep_time: an int representing the amount of time slept thus far waiting to check the risks/notables :returns: an IntegrationTestResult on failure; None on success """ - # Create a mapping of the relevant observables to counters - observables = CorrelationSearch._get_relevant_observables(self.detection.tags.observable) - observable_counts: dict[str, int] = {str(x): 0 for x in observables} + # Ensure the rba object is defined + if self.detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{self.detection.name}' has no RBA objects associated" + " with it; cannot validate." + ) + risk_object_counts: dict[str, int] = {str(x): 0 for x in self.detection.rba.risk_objects} # NOTE: we intentionally want this to be an error state and not a failure state, as # ultimately this validation should be handled during the build process - if len(observables) != len(observable_counts): + if len(self.detection.rba.risk_objects) != len(risk_object_counts): raise ClientError( - f"At least two observables in '{self.detection.name}' have the same name; " - "each observable for a detection should be unique." + f"At least two risk objects in '{self.detection.name}' have the same name; " + "each risk object for a detection should be unique." ) # Get the risk events; note that we use the cached risk events, expecting they were @@ -678,58 +676,61 @@ def validate_risk_events(self) -> None: ) event.validate_against_detection(self.detection) - # Update observable count based on match - matched_observable = event.get_matched_observable(self.detection.tags.observable) + # Update risk object count based on match + matched_risk_object = event.get_matched_risk_object(self.detection.rba.risk_objects) self.logger.debug( f"Matched risk event (object={event.risk_object}, type={event.risk_object_type}) " - f"to observable (name={matched_observable.name}, type={matched_observable.type}, " - f"role={matched_observable.role}) using the source field " + f"to detection's risk object (name={matched_risk_object.field}, " + f"type={matched_risk_object.type.value} using the source field " f"'{event.source_field_name}'" ) - observable_counts[str(matched_observable)] += 1 + risk_object_counts[str(matched_risk_object)] += 1 - # Report any observables which did not have at least one match to a risk event - for observable in observables: + # Report any risk objects which did not have at least one match to a risk event + for risk_object in self.detection.rba.risk_objects: self.logger.debug( - f"Matched observable (name={observable.name}, type={observable.type}, " - f"role={observable.role}) to {observable_counts[str(observable)]} risk events." + f"Matched risk object (name={risk_object.field}, type={risk_object.type.value} " + f"to {risk_object_counts[str(risk_object)]} risk events." ) - if observable_counts[str(observable)] == 0: + if risk_object_counts[str(risk_object)] == 0: raise ValidationFailed( - f"Observable (name={observable.name}, type={observable.type}, " - f"role={observable.role}) was not matched to any risk events." + f"Risk object (name={risk_object.field}, type={risk_object.type.value} " + "was not matched to any risk events." ) # TODO (#250): Re-enable and refactor code that validates the specific risk counts # Validate risk events in aggregate; we should have an equal amount of risk events for each - # relevant observable, and the total count should match the total number of events + # relevant risk object, and the total count should match the total number of events # individual_count: int | None = None # total_count = 0 - # for observable_str in observable_counts: + # for risk_object_str in risk_object_counts: # self.logger.debug( - # f"Observable <{observable_str}> match count: {observable_counts[observable_str]}" + # f"Risk object <{risk_object_str}> match count: {risk_object_counts[risk_object_str]}" # ) # # Grab the first value encountered if not set yet # if individual_count is None: - # individual_count = observable_counts[observable_str] + # individual_count = risk_object_counts[risk_object_str] # else: - # # Confirm that the count for the current observable matches the count of the others - # if observable_counts[observable_str] != individual_count: + # # Confirm that the count for the current risk object matches the count of the + # # others + # if risk_object_counts[risk_object_str] != individual_count: # raise ValidationFailed( - # f"Count of risk events matching observable <\"{observable_str}\"> " - # f"({observable_counts[observable_str]}) does not match the count of those " - # f"matching other observables ({individual_count})." + # f"Count of risk events matching detection's risk object <\"{risk_object_str}\"> " + # f"({risk_object_counts[risk_object_str]}) does not match the count of those " + # f"matching other risk objects ({individual_count})." # ) - # # Aggregate total count of events matched to observables - # total_count += observable_counts[observable_str] + # # Aggregate total count of events matched to risk objects + # total_count += risk_object_counts[risk_object_str] - # # Raise if the the number of events doesn't match the number of those matched to observables + # # Raise if the the number of events doesn't match the number of those matched to risk + # # objects # if len(events) != total_count: # raise ValidationFailed( # f"The total number of risk events {len(events)} does not match the number of " - # f"risk events we were able to match against observables ({total_count})." + # "risk events we were able to match against risk objects from the detection " + # f"({total_count})." # ) # TODO (PEX-434): implement deeper notable validation @@ -783,11 +784,11 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP, raise_on_exc: bool = Fa ) self.update_pbar(TestingStates.PRE_CLEANUP) if self.risk_event_exists(): - self.logger.warn( + self.logger.warning( f"Risk events matching '{self.name}' already exist; marking for deletion" ) if self.notable_event_exists(): - self.logger.warn( + self.logger.warning( f"Notable events matching '{self.name}' already exist; marking for deletion" ) self.cleanup() @@ -934,11 +935,11 @@ def _search(self, query: str) -> ResultIterator: :param query: the SPL string to run """ self.logger.debug(f"Executing query: `{query}`") - job = self.service.search(query, exec_mode="blocking") + job = self.service.search(query, exec_mode="blocking") # type: ignore # query the results, catching any HTTP status code errors try: - response_reader: ResponseReader = job.results(output_mode="json") + response_reader: ResponseReader = job.results(output_mode="json") # type: ignore except HTTPError as e: # e.g. -> HTTP 400 Bad Request -- b'{"messages":[{"type":"FATAL","text":"Error in \'delete\' command: You # have insufficient privileges to delete events."}]}' @@ -946,7 +947,7 @@ def _search(self, query: str) -> ResultIterator: self.logger.error(message) raise ServerError(message) - return ResultIterator(response_reader) + return ResultIterator(response_reader) # type: ignore def _delete_index(self, index: str) -> None: """Deletes events in a given index @@ -979,7 +980,7 @@ def _delete_index(self, index: str) -> None: message = f"No result returned showing deletion in index {index}" raise ServerError(message) - def cleanup(self, delete_test_index=False) -> None: + def cleanup(self, delete_test_index: bool = False) -> None: """Cleans up after an integration test First, disable the detection; then dump the risk, notable, and (optionally) test indexes. The test index is diff --git a/contentctl/objects/detection_tags.py b/contentctl/objects/detection_tags.py index c30dc453..2a549468 100644 --- a/contentctl/objects/detection_tags.py +++ b/contentctl/objects/detection_tags.py @@ -42,13 +42,17 @@ class DetectionTags(BaseModel): analytic_story: list[Story] = Field(...) asset_type: AssetType = Field(...) group: list[str] = [] + + # TODO (cmcginley): should confidence, impact and the risk_score property be removed? confidence: NonNegativeInt = Field(...,le=100) impact: NonNegativeInt = Field(...,le=100) @computed_field @property def risk_score(self) -> int: return round((self.confidence * self.impact)/100) - + + # TODO (cmcginley): @ljstella what's happening w/ severity, given it's use of the top-level + # risk_score? @computed_field @property def severity(self)->RiskSeverity: @@ -69,6 +73,7 @@ def severity(self)->RiskSeverity: mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = [] nist: list[NistCategory] = [] + # TODO (cmcginley): observable should be removed as well, yes? # TODO (#249): Add pydantic validator to ensure observables are unique within a detection observable: List[Observable] = [] product: list[SecurityContentProductName] = Field(..., min_length=1) @@ -76,7 +81,6 @@ def severity(self)->RiskSeverity: security_domain: SecurityDomain = Field(...) cve: List[CVE_TYPE] = [] atomic_guid: List[AtomicTest] = [] - # enrichment mitre_attack_enrichments: List[MitreAttackEnrichment] = Field([], validate_default=True) @@ -144,6 +148,7 @@ def cis20(self) -> list[Cis18Value]: # ) # return v + # TODO (cmcginley): @ljstella removing risk_score and severity from serialization? @model_serializer def serialize_model(self): # Since this field has no parent, there is no need to call super() serialization function diff --git a/contentctl/objects/drilldown.py b/contentctl/objects/drilldown.py index 3fe41e7c..b5604748 100644 --- a/contentctl/objects/drilldown.py +++ b/contentctl/objects/drilldown.py @@ -23,6 +23,7 @@ class Drilldown(BaseModel): "but it is NOT the default value and must be supplied explicitly.", min_length= 1) + # TODO (cmcginley): @ljstella the drilldowns will need to be updated @classmethod def constructDrilldownsFromDetection(cls, detection: Detection) -> list[Drilldown]: victim_observables = [o for o in detection.tags.observable if o.role[0] == "Victim"] diff --git a/contentctl/objects/observable.py b/contentctl/objects/observable.py index 81b04922..35eb535a 100644 --- a/contentctl/objects/observable.py +++ b/contentctl/objects/observable.py @@ -1,6 +1,7 @@ from pydantic import BaseModel, field_validator, ConfigDict from contentctl.objects.constants import SES_OBSERVABLE_TYPE_MAPPING, RBA_OBSERVABLE_ROLE_MAPPING +# TODO (cmcginley): should this class be removed? class Observable(BaseModel): model_config = ConfigDict(extra="forbid") diff --git a/contentctl/objects/rba.py b/contentctl/objects/rba.py index 1f9a6c31..ad25ab46 100644 --- a/contentctl/objects/rba.py +++ b/contentctl/objects/rba.py @@ -38,6 +38,7 @@ class ThreatObjectType(str, Enum): TLS_HASH = "tls_hash" URL = "url" +# TODO (cmcginley): class names should be capitalized class risk_object(BaseModel): field: str type: RiskObjectType diff --git a/contentctl/objects/risk_event.py b/contentctl/objects/risk_event.py index de98bd0b..cf9f9ea8 100644 --- a/contentctl/objects/risk_event.py +++ b/contentctl/objects/risk_event.py @@ -4,50 +4,10 @@ from pydantic import ConfigDict, BaseModel, Field, PrivateAttr, field_validator, computed_field from contentctl.objects.errors import ValidationFailed from contentctl.objects.detection import Detection -from contentctl.objects.observable import Observable - -# TODO (#259): Map our observable types to more than user/system -# TODO (#247): centralize this mapping w/ usage of SES_OBSERVABLE_TYPE_MAPPING (see -# observable.py) and the ad hoc mapping made in detection_abstract.py (see the risk property func) -TYPE_MAP: dict[str, list[str]] = { - "system": [ - "Hostname", - "IP Address", - "Endpoint" - ], - "user": [ - "User", - "User Name", - "Email Address", - "Email" - ], - "hash_values": [], - "network_artifacts": [], - "host_artifacts": [], - "tools": [], - "other": [ - "Process", - "URL String", - "Unknown", - "Process Name", - "MAC Address", - "File Name", - "File Hash", - "Resource UID", - "Uniform Resource Locator", - "File", - "Geo Location", - "Container", - "Registry Key", - "Registry Value", - "Other" - ] -} - -# Roles that should not generate risks -IGNORE_ROLES: list[str] = ["Attacker"] +from contentctl.objects.rba import risk_object as RiskObject +# TODO (cmcginley): the names below now collide a bit with Lou's new class names class RiskEvent(BaseModel): """Model for risk event in ES""" @@ -79,11 +39,11 @@ class RiskEvent(BaseModel): ) # Contributing events search query (we use this to derive the corresponding field from the - # observables) + # detection's risk object definition) contributing_events_search: str - # Private attribute caching the observable this RiskEvent is mapped to - _matched_observable: Observable | None = PrivateAttr(default=None) + # Private attribute caching the risk object this RiskEvent is mapped to + _matched_risk_object: RiskObject | None = PrivateAttr(default=None) # Allowing fields that aren't explicitly defined to be passed since some of the risk event's # fields vary depending on the SPL which generated them @@ -108,7 +68,7 @@ def _convert_str_value_to_singleton(cls, v: str | list[str]) -> list[str]: def source_field_name(self) -> str: """ A cached derivation of the source field name the risk event corresponds to in the relevant - event(s). Useful for mapping back to an observable in the detection. + event(s). Useful for mapping back to a risk object in the detection. """ pattern = re.compile( r"\| savedsearch \"" + self.search_name + r"\" \| search (?P[^=]+)=.+" @@ -128,13 +88,6 @@ def validate_against_detection(self, detection: Detection) -> None: :param detection: the detection associated w/ this risk event :raises: ValidationFailed """ - # Check risk_score - if self.risk_score != detection.tags.risk_score: - raise ValidationFailed( - f"Risk score observed in risk event ({self.risk_score}) does not match risk score in " - f"detection ({detection.tags.risk_score})." - ) - # Check analyticstories self.validate_analyticstories(detection) @@ -151,8 +104,15 @@ def validate_against_detection(self, detection: Detection) -> None: # Check risk_message self.validate_risk_message(detection) - # Check several conditions against the observables - self.validate_risk_against_observables(detection.tags.observable) + # Ensure the rba object is defined + if detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{detection.name}' has no RBA objects associated " + "with it; cannot validate." + ) + + # Check several conditions against the detection's risk objects + self.validate_risk_against_risk_objects(detection.rba.risk_objects) def validate_mitre_ids(self, detection: Detection) -> None: """ @@ -180,15 +140,25 @@ def validate_analyticstories(self, detection: Detection) -> None: f" in detection ({detection.tags.analytic_story})." ) + # TODO (cmcginley): all of this type checking is a good use case (potentially) for subtyping + # detections by detection type, instead of having types as enums; could have an EBD subtype + # for any detections that should produce risk so that rba is never None def validate_risk_message(self, detection: Detection) -> None: """ Given the associated detection, validate the risk event's message :param detection: the detection associated w/ this risk event :raises: ValidationFailed """ + # Ensure the rba object is defined + if detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{detection.name}' has no RBA objects associated " + "with it; cannot validate." + ) + # Extract the field replacement tokens ("$...$") field_replacement_pattern = re.compile(r"\$\S+\$") - tokens = field_replacement_pattern.findall(detection.tags.message) + tokens = field_replacement_pattern.findall(detection.rba.message) # Check for the presence of each token in the message from the risk event for token in tokens: @@ -205,7 +175,7 @@ def validate_risk_message(self, detection: Detection) -> None: escaped_source_message_with_placeholder: str = re.escape( field_replacement_pattern.sub( tmp_placeholder, - detection.tags.message + detection.rba.message ) ) placeholder_replacement_pattern = re.compile(tmp_placeholder) @@ -221,114 +191,91 @@ def validate_risk_message(self, detection: Detection) -> None: raise ValidationFailed( "Risk message in event does not match the pattern set by the detection. Message in " f"risk event: \"{self.risk_message}\". Message in detection: " - f"\"{detection.tags.message}\"." + f"\"{detection.rba.message}\"." ) - def validate_risk_against_observables(self, observables: list[Observable]) -> None: + def validate_risk_against_risk_objects(self, risk_objects: set[RiskObject]) -> None: """ - Given the observables from the associated detection, validate the risk event against those - observables - :param observables: the Observable objects from the detection + Given the risk objects from the associated detection, validate the risk event against those + risk objects + :param risk_objects: the risk objects from the detection :raises: ValidationFailed """ - # Get the matched observable; will raise validation errors if no match can be made or if - # risk is missing values associated w/ observables - matched_observable = self.get_matched_observable(observables) + # Get the matched risk object; will raise validation errors if no match can be made or if + # risk is missing values associated w/ risk objects + matched_risk_object = self.get_matched_risk_object(risk_objects) - # The risk object type should match our mapping of observable types to risk types - expected_type = RiskEvent.observable_type_to_risk_type(matched_observable.type) - if self.risk_object_type != expected_type: + # The risk object type from the risk event should match our mapping of internal risk object + # types + if self.risk_object_type != matched_risk_object.type.value: raise ValidationFailed( - f"The risk object type ({self.risk_object_type}) does not match the expected type " - f"based on the matched observable ({matched_observable.type}->{expected_type}): " - f"risk=(object={self.risk_object}, type={self.risk_object_type}, " - f"source_field_name={self.source_field_name}), " - f"observable=(name={matched_observable.name}, type={matched_observable.type}, " - f"role={matched_observable.role})" + f"The risk object type from the risk event ({self.risk_object_type}) does not match" + " the expected type based on the matched risk object " + f"({matched_risk_object.type.value}): risk event=(object={self.risk_object}, " + f"type={self.risk_object_type}, source_field_name={self.source_field_name}), " + f"risk object=(name={matched_risk_object.field}, " + f"type={matched_risk_object.type.value})" ) - @staticmethod - def observable_type_to_risk_type(observable_type: str) -> str: - """ - Given a string representing the observable type, use our mapping to convert it to the - expected type in the risk event - :param observable_type: the type of the observable - :returns: a string (the risk object type) - :raises ValueError: if the observable type has not yet been mapped to a risk object type - """ - # Iterate over the map and search the lists for a match - for risk_type in TYPE_MAP: - if observable_type in TYPE_MAP[risk_type]: - return risk_type - - raise ValueError( - f"Observable type {observable_type} does not have a mapping to a risk type in TYPE_MAP" - ) + # Check risk_score + if self.risk_score != matched_risk_object.score: + raise ValidationFailed( + f"Risk score observed in risk event ({self.risk_score}) does not match risk score in " + f"matched risk object from detection ({matched_risk_object.score})." + ) - @staticmethod - def ignore_observable(observable: Observable) -> bool: - """ - Given an observable, determine based on its roles if it should be ignored in risk/observable - matching (e.g. Attacker role observables should not generate risk events) - :param observable: the Observable object we are checking the roles of - :returns: a bool indicating whether this observable should be ignored or not + def get_matched_risk_object(self, risk_objects: set[RiskObject]) -> RiskObject: """ - ignore = False - for role in observable.role: - if role in IGNORE_ROLES: - ignore = True - break - return ignore - - def get_matched_observable(self, observables: list[Observable]) -> Observable: - """ - Given a list of observables, return the one this risk event matches - :param observables: the list of Observable objects we are checking against - :returns: the matched Observable object + Given a set of risk objects, return the one this risk event matches + :param risk_objects: the list of risk objects we are checking against + :returns: the matched risk object :raises ValidationFailed: if a match could not be made or if an expected field (based on - one of the observables) could not be found in the risk event + one of the risk objects) could not be found in the risk event """ # Return the cached match if already found - if self._matched_observable is not None: - return self._matched_observable + if self._matched_risk_object is not None: + return self._matched_risk_object - matched_observable: Observable | None = None + matched_risk_object: RiskObject | None = None # Iterate over the obervables and check for a match - for observable in observables: + for risk_object in risk_objects: # TODO (#252): Refactor and re-enable per-field validation of risk events - # Each the field name used in each observable shoud be present in the risk event - # if not hasattr(self, observable.name): + # Each the field name used in each risk object shoud be present in the risk event + # if not hasattr(self, risk_object.field): # raise ValidationFailed( - # f"Observable field \"{observable.name}\" not found in risk event." + # f"Risk object field \"{risk_object.field}\" not found in risk event." # ) - # Try to match the risk_object against a specific observable for the obervables with - # a valid role (some, like Attacker, shouldn't get converted to risk events) - if self.source_field_name == observable.name: - if matched_observable is not None: + # Try to match the risk_object against a specific risk object + if self.source_field_name == risk_object.field: + if matched_risk_object is not None: raise ValueError( "Unexpected conditon: we don't expect the source event field " - "corresponding to an observables field name to be repeated." + "corresponding to an risk object's field name to be repeated." ) - # Report any risk events we find that shouldn't be there - if RiskEvent.ignore_observable(observable): - raise ValidationFailed( - "Risk event matched an observable with an invalid role: " - f"(name={observable.name}, type={observable.type}, role={observable.role})") - # NOTE: we explicitly do not break early as we want to check each observable - matched_observable = observable + # TODO (cmcginley): risk objects and threat objects are now in separate sets, + # so I think we can safely eliminate this check entirely? + # # Report any risk events we find that shouldn't be there + # if RiskEvent.ignore_observable(observable): + # raise ValidationFailed( + # "Risk event matched an observable with an invalid role: " + # f"(name={observable.name}, type={observable.type}, role={observable.role})") + + # NOTE: we explicitly do not break early as we want to check each risk object + matched_risk_object = risk_object - # Ensure we were able to match the risk event to a specific observable - if matched_observable is None: + # Ensure we were able to match the risk event to a specific risk object + if matched_risk_object is None: raise ValidationFailed( f"Unable to match risk event (object={self.risk_object}, type=" - f"{self.risk_object_type}, source_field_name={self.source_field_name}) to an " - "observable; please check for errors in the observable roles/types for this " - "detection, as well as the risk event build process in contentctl." + f"{self.risk_object_type}, source_field_name={self.source_field_name}) to a " + "risk object in the detection; please check for errors in the risk object types for this " + "detection, as well as the risk event build process in contentctl (e.g. threat " + "objects aren't being converted to risk objects somehow)." ) - # Cache and return the matched observable - self._matched_observable = matched_observable - return self._matched_observable + # Cache and return the matched risk object + self._matched_risk_object = matched_risk_object + return self._matched_risk_object From 72c51a4c2487c799ad7e859eeb760f1b11cb91b1 Mon Sep 17 00:00:00 2001 From: Casey McGinley Date: Thu, 16 Jan 2025 14:29:06 -0800 Subject: [PATCH 2/4] cleanup; log fixes --- .../detection_abstract.py | 1 - contentctl/objects/correlation_search.py | 36 ++++++++----------- contentctl/objects/detection_tags.py | 8 ++--- contentctl/objects/risk_event.py | 11 ++++-- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index dbaa37e5..8dcf01ae 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -279,7 +279,6 @@ def source(self) -> str: deployment: Deployment = Field({}) - # TODO (cmcginley): @ljstella removing the refs to confidence and impact? @computed_field @property def annotations(self) -> dict[str, Union[List[str], int, str]]: diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py index b0cffe4e..b8bc4249 100644 --- a/contentctl/objects/correlation_search.py +++ b/contentctl/objects/correlation_search.py @@ -31,8 +31,9 @@ from contentctl.objects.notable_event import NotableEvent +# TODO (cmcginley): disable logging # Suppress logging by default; enable for local testing -ENABLE_LOGGING = False +ENABLE_LOGGING = True LOG_LEVEL = logging.DEBUG LOG_PATH = "correlation_search.log" @@ -652,15 +653,8 @@ def validate_risk_events(self) -> None: f"Unexpected error: Detection '{self.detection.name}' has no RBA objects associated" " with it; cannot validate." ) - risk_object_counts: dict[str, int] = {str(x): 0 for x in self.detection.rba.risk_objects} - # NOTE: we intentionally want this to be an error state and not a failure state, as - # ultimately this validation should be handled during the build process - if len(self.detection.rba.risk_objects) != len(risk_object_counts): - raise ClientError( - f"At least two risk objects in '{self.detection.name}' have the same name; " - "each risk object for a detection should be unique." - ) + risk_object_counts: dict[int, int] = {id(x): 0 for x in self.detection.rba.risk_objects} # Get the risk events; note that we use the cached risk events, expecting they were # saved by a prior call to risk_event_exists @@ -681,20 +675,20 @@ def validate_risk_events(self) -> None: self.logger.debug( f"Matched risk event (object={event.risk_object}, type={event.risk_object_type}) " f"to detection's risk object (name={matched_risk_object.field}, " - f"type={matched_risk_object.type.value} using the source field " + f"type={matched_risk_object.type.value}) using the source field " f"'{event.source_field_name}'" ) - risk_object_counts[str(matched_risk_object)] += 1 + risk_object_counts[id(matched_risk_object)] += 1 # Report any risk objects which did not have at least one match to a risk event for risk_object in self.detection.rba.risk_objects: self.logger.debug( f"Matched risk object (name={risk_object.field}, type={risk_object.type.value} " - f"to {risk_object_counts[str(risk_object)]} risk events." + f"to {risk_object_counts[id(risk_object)]} risk events." ) - if risk_object_counts[str(risk_object)] == 0: + if risk_object_counts[id(risk_object)] == 0: raise ValidationFailed( - f"Risk object (name={risk_object.field}, type={risk_object.type.value} " + f"Risk object (name={risk_object.field}, type={risk_object.type.value}) " "was not matched to any risk events." ) @@ -703,26 +697,26 @@ def validate_risk_events(self) -> None: # relevant risk object, and the total count should match the total number of events # individual_count: int | None = None # total_count = 0 - # for risk_object_str in risk_object_counts: + # for risk_object_id in risk_object_counts: # self.logger.debug( - # f"Risk object <{risk_object_str}> match count: {risk_object_counts[risk_object_str]}" + # f"Risk object <{risk_object_id}> match count: {risk_object_counts[risk_object_id]}" # ) # # Grab the first value encountered if not set yet # if individual_count is None: - # individual_count = risk_object_counts[risk_object_str] + # individual_count = risk_object_counts[risk_object_id] # else: # # Confirm that the count for the current risk object matches the count of the # # others - # if risk_object_counts[risk_object_str] != individual_count: + # if risk_object_counts[risk_object_id] != individual_count: # raise ValidationFailed( - # f"Count of risk events matching detection's risk object <\"{risk_object_str}\"> " - # f"({risk_object_counts[risk_object_str]}) does not match the count of those " + # f"Count of risk events matching detection's risk object <\"{risk_object_id}\"> " + # f"({risk_object_counts[risk_object_id]}) does not match the count of those " # f"matching other risk objects ({individual_count})." # ) # # Aggregate total count of events matched to risk objects - # total_count += risk_object_counts[risk_object_str] + # total_count += risk_object_counts[risk_object_id] # # Raise if the the number of events doesn't match the number of those matched to risk # # objects diff --git a/contentctl/objects/detection_tags.py b/contentctl/objects/detection_tags.py index 42800a92..aea02bfe 100644 --- a/contentctl/objects/detection_tags.py +++ b/contentctl/objects/detection_tags.py @@ -4,8 +4,6 @@ from pydantic import ( BaseModel, Field, - NonNegativeInt, - PositiveInt, computed_field, UUID4, HttpUrl, @@ -34,6 +32,7 @@ from contentctl.objects.atomic import AtomicEnrichment, AtomicTest from contentctl.objects.annotated_types import MITRE_ATTACK_ID_TYPE, CVE_TYPE + class DetectionTags(BaseModel): # detection spec @@ -41,7 +40,7 @@ class DetectionTags(BaseModel): analytic_story: list[Story] = Field(...) asset_type: AssetType = Field(...) group: list[str] = [] - + mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = [] nist: list[NistCategory] = [] @@ -84,7 +83,7 @@ def cis20(self) -> list[Cis18Value]: # TODO (#268): Validate manual_test has length > 0 if not None manual_test: Optional[str] = None - + # The following validator is temporarily disabled pending further discussions # @validator('message') # def validate_message(cls,v,values): @@ -117,7 +116,6 @@ def cis20(self) -> list[Cis18Value]: # ) # return v - # TODO (cmcginley): @ljstella removing risk_score and severity from serialization? @model_serializer def serialize_model(self): # Since this field has no parent, there is no need to call super() serialization function diff --git a/contentctl/objects/risk_event.py b/contentctl/objects/risk_event.py index cf9f9ea8..5daa722b 100644 --- a/contentctl/objects/risk_event.py +++ b/contentctl/objects/risk_event.py @@ -137,7 +137,7 @@ def validate_analyticstories(self, detection: Detection) -> None: if sorted(self.analyticstories) != sorted(detection_analytic_story): raise ValidationFailed( f"Analytic stories in risk event ({self.analyticstories}) do not match those" - f" in detection ({detection.tags.analytic_story})." + f" in detection ({[x.name for x in detection.tags.analytic_story]})." ) # TODO (cmcginley): all of this type checking is a good use case (potentially) for subtyping @@ -160,6 +160,9 @@ def validate_risk_message(self, detection: Detection) -> None: field_replacement_pattern = re.compile(r"\$\S+\$") tokens = field_replacement_pattern.findall(detection.rba.message) + # TODO (cmcginley): could expand this to get the field values from the raw events and check + # to see that allexpected strings ARE in the risk message (as opposed to checking only + # that unexpected strings aren't) # Check for the presence of each token in the message from the risk event for token in tokens: if token in self.risk_message: @@ -249,10 +252,12 @@ def get_matched_risk_object(self, risk_objects: set[RiskObject]) -> RiskObject: # Try to match the risk_object against a specific risk object if self.source_field_name == risk_object.field: + # TODO (cmcginley): this should be enforced as part of build validation if matched_risk_object is not None: raise ValueError( - "Unexpected conditon: we don't expect the source event field " - "corresponding to an risk object's field name to be repeated." + "Unexpected conditon: we don't expect multiple risk objects to use the " + "same field name, so we should not be able match the risk event to " + "multiple risk objects." ) # TODO (cmcginley): risk objects and threat objects are now in separate sets, From f72c7962801aaf3b94a45e848ec14dda5ebc0d43 Mon Sep 17 00:00:00 2001 From: Casey McGinley Date: Thu, 16 Jan 2025 14:45:18 -0800 Subject: [PATCH 3/4] resolving some todos --- contentctl/objects/correlation_search.py | 7 ++--- contentctl/objects/rba.py | 1 - contentctl/objects/risk_event.py | 37 +++++++++--------------- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py index b8bc4249..9ce66a76 100644 --- a/contentctl/objects/correlation_search.py +++ b/contentctl/objects/correlation_search.py @@ -31,9 +31,8 @@ from contentctl.objects.notable_event import NotableEvent -# TODO (cmcginley): disable logging # Suppress logging by default; enable for local testing -ENABLE_LOGGING = True +ENABLE_LOGGING = False LOG_LEVEL = logging.DEBUG LOG_PATH = "correlation_search.log" @@ -665,7 +664,7 @@ def validate_risk_events(self) -> None: for event in events: c += 1 self.logger.debug( - f"Validating risk event ({event.risk_object}, {event.risk_object_type}): " + f"Validating risk event ({event.es_risk_object}, {event.es_risk_object_type}): " f"{c}/{len(events)}" ) event.validate_against_detection(self.detection) @@ -673,7 +672,7 @@ def validate_risk_events(self) -> None: # Update risk object count based on match matched_risk_object = event.get_matched_risk_object(self.detection.rba.risk_objects) self.logger.debug( - f"Matched risk event (object={event.risk_object}, type={event.risk_object_type}) " + f"Matched risk event (object={event.es_risk_object}, type={event.es_risk_object_type}) " f"to detection's risk object (name={matched_risk_object.field}, " f"type={matched_risk_object.type.value}) using the source field " f"'{event.source_field_name}'" diff --git a/contentctl/objects/rba.py b/contentctl/objects/rba.py index 3823ba8c..96294d9a 100644 --- a/contentctl/objects/rba.py +++ b/contentctl/objects/rba.py @@ -40,7 +40,6 @@ class ThreatObjectType(str, Enum): TLS_HASH = "tls_hash" URL = "url" -# TODO (cmcginley): class names should be capitalized class risk_object(BaseModel): field: str type: RiskObjectType diff --git a/contentctl/objects/risk_event.py b/contentctl/objects/risk_event.py index 5daa722b..24560d2b 100644 --- a/contentctl/objects/risk_event.py +++ b/contentctl/objects/risk_event.py @@ -7,7 +7,6 @@ from contentctl.objects.rba import risk_object as RiskObject -# TODO (cmcginley): the names below now collide a bit with Lou's new class names class RiskEvent(BaseModel): """Model for risk event in ES""" @@ -15,10 +14,12 @@ class RiskEvent(BaseModel): search_name: str # The subject of the risk event (e.g. a username, process name, system name, account ID, etc.) - risk_object: int | str + # (not to be confused w/ the risk object from the detection) + es_risk_object: int | str - # The type of the risk object (e.g. user, system, or other) - risk_object_type: str + # The type of the risk object from ES (e.g. user, system, or other) (not to be confused w/ + # the risk object from the detection) + es_risk_object_type: str # The level of risk associated w/ the risk event risk_score: int @@ -140,9 +141,6 @@ def validate_analyticstories(self, detection: Detection) -> None: f" in detection ({[x.name for x in detection.tags.analytic_story]})." ) - # TODO (cmcginley): all of this type checking is a good use case (potentially) for subtyping - # detections by detection type, instead of having types as enums; could have an EBD subtype - # for any detections that should produce risk so that rba is never None def validate_risk_message(self, detection: Detection) -> None: """ Given the associated detection, validate the risk event's message @@ -160,7 +158,7 @@ def validate_risk_message(self, detection: Detection) -> None: field_replacement_pattern = re.compile(r"\$\S+\$") tokens = field_replacement_pattern.findall(detection.rba.message) - # TODO (cmcginley): could expand this to get the field values from the raw events and check + # TODO (#346): could expand this to get the field values from the raw events and check # to see that allexpected strings ARE in the risk message (as opposed to checking only # that unexpected strings aren't) # Check for the presence of each token in the message from the risk event @@ -210,12 +208,12 @@ def validate_risk_against_risk_objects(self, risk_objects: set[RiskObject]) -> N # The risk object type from the risk event should match our mapping of internal risk object # types - if self.risk_object_type != matched_risk_object.type.value: + if self.es_risk_object_type != matched_risk_object.type.value: raise ValidationFailed( - f"The risk object type from the risk event ({self.risk_object_type}) does not match" + f"The risk object type from the risk event ({self.es_risk_object_type}) does not match" " the expected type based on the matched risk object " - f"({matched_risk_object.type.value}): risk event=(object={self.risk_object}, " - f"type={self.risk_object_type}, source_field_name={self.source_field_name}), " + f"({matched_risk_object.type.value}): risk event=(object={self.es_risk_object}, " + f"type={self.es_risk_object_type}, source_field_name={self.source_field_name}), " f"risk object=(name={matched_risk_object.field}, " f"type={matched_risk_object.type.value})" ) @@ -252,7 +250,8 @@ def get_matched_risk_object(self, risk_objects: set[RiskObject]) -> RiskObject: # Try to match the risk_object against a specific risk object if self.source_field_name == risk_object.field: - # TODO (cmcginley): this should be enforced as part of build validation + # TODO (#347): enforce that field names are not repeated across risk objects as + # part of build/validate if matched_risk_object is not None: raise ValueError( "Unexpected conditon: we don't expect multiple risk objects to use the " @@ -260,22 +259,14 @@ def get_matched_risk_object(self, risk_objects: set[RiskObject]) -> RiskObject: "multiple risk objects." ) - # TODO (cmcginley): risk objects and threat objects are now in separate sets, - # so I think we can safely eliminate this check entirely? - # # Report any risk events we find that shouldn't be there - # if RiskEvent.ignore_observable(observable): - # raise ValidationFailed( - # "Risk event matched an observable with an invalid role: " - # f"(name={observable.name}, type={observable.type}, role={observable.role})") - # NOTE: we explicitly do not break early as we want to check each risk object matched_risk_object = risk_object # Ensure we were able to match the risk event to a specific risk object if matched_risk_object is None: raise ValidationFailed( - f"Unable to match risk event (object={self.risk_object}, type=" - f"{self.risk_object_type}, source_field_name={self.source_field_name}) to a " + f"Unable to match risk event (object={self.es_risk_object}, type=" + f"{self.es_risk_object_type}, source_field_name={self.source_field_name}) to a " "risk object in the detection; please check for errors in the risk object types for this " "detection, as well as the risk event build process in contentctl (e.g. threat " "objects aren't being converted to risk objects somehow)." From 51f078032bdfef6a22b799d951e204295f13ada2 Mon Sep 17 00:00:00 2001 From: Casey McGinley Date: Thu, 16 Jan 2025 14:54:00 -0800 Subject: [PATCH 4/4] new class name --- contentctl/objects/risk_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contentctl/objects/risk_event.py b/contentctl/objects/risk_event.py index 24560d2b..71ef3ed0 100644 --- a/contentctl/objects/risk_event.py +++ b/contentctl/objects/risk_event.py @@ -4,7 +4,7 @@ from pydantic import ConfigDict, BaseModel, Field, PrivateAttr, field_validator, computed_field from contentctl.objects.errors import ValidationFailed from contentctl.objects.detection import Detection -from contentctl.objects.rba import risk_object as RiskObject +from contentctl.objects.rba import RiskObject class RiskEvent(BaseModel):