Skip to content

Commit

Permalink
chore(iast): use allowlist for selecting modules to patch (#11946)
Browse files Browse the repository at this point in the history
## Description

Switch from a denylist for patching IAST modules to an allowlist. 

Signed-off-by: Juanjo Alvarez <[email protected]>##
Checklist
- [X] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Signed-off-by: Juanjo Alvarez <[email protected]>
  • Loading branch information
juanjux authored Jan 17, 2025
1 parent b1479cd commit 26dc258
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 190 deletions.
227 changes: 137 additions & 90 deletions ddtrace/appsec/_iast/_ast/ast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from types import ModuleType
from typing import Iterable
from typing import Optional
from typing import Set
from typing import Text
from typing import Tuple

Expand All @@ -26,57 +27,43 @@
_PREFIX = IAST.PATCH_ADDED_SYMBOL_PREFIX

# Prefixes for modules where IAST patching is allowed
IAST_ALLOWLIST: Tuple[Text, ...] = ("tests.appsec.iast.",)
# Only packages that have the test_propagation=True in test_packages and are not in the denylist must be here
IAST_ALLOWLIST: Tuple[Text, ...] = (
"attrs.",
"beautifulsoup4.",
"cachetools.",
"cryptography.",
"docutils.",
"idna.",
"iniconfig.",
"jinja2.",
"lxml.",
"multidict.",
"platformdirs",
"pygments.",
"pynacl.",
"pyparsing.",
"multipart",
"sqlalchemy.",
"tomli",
"yarl.",
)

# NOTE: For testing reasons, don't add astunparse here, see test_ast_patching.py
IAST_DENYLIST: Tuple[Text, ...] = (
"altgraph.",
"dipy.",
"black.",
"mypy.",
"mypy_extensions.",
"autopep8.",
"pycodestyle.",
"pydicom.",
"pyinstaller.",
"pystray.",
"contourpy.",
"cx_logging.",
"dateutil.",
"pytz.",
"wcwidth.",
"win32ctypes.",
"xlib.",
"cycler.",
"cython.",
"dnspython.",
"elasticdeform.",
"numpy.",
"matplotlib.",
"skbase.",
"scipy.",
"networkx.",
"imageio.",
"fonttools.",
"nibabel.",
"nilearn.",
"gprof2dot.",
"h5py.",
"kiwisolver.",
"pandas.",
"pdf2image.",
"pefile.",
"pil.",
"threadpoolctl.",
"tifffile.",
"tqdm.",
"trx.",
"flask.",
"werkzeug.",
"_psycopg.", # PostgreSQL adapter for Python (v3)
"_pytest.",
"aiohttp._helpers.",
"aiohttp._http_parser.",
"aiohttp._http_writer.",
"aiohttp._websocket.",
"aiohttp.log.",
"aiohttp.tcp_helpers.",
"aioquic.",
"altgraph.",
"anyio.",
"api_pb2.", # Patching crashes with these auto-generated modules, propagation is not needed
"api_pb2_grpc.", # Patching crashes with these auto-generated modules, propagation is not needed
"asyncio.base_events.",
"asyncio.base_futures.",
"asyncio.base_subprocess.",
Expand All @@ -99,18 +86,24 @@
"asyncio.transports.",
"asyncio.trsock.",
"asyncio.unix_events.",
"asyncpg.pgproto.",
"attr._config.",
"attr._next_gen.",
"attr.filters.",
"attr.setters.",
"autopep8.",
"backports.",
"black.",
"blinker.",
"boto3.docs.docstring.",
"boto3.s3.",
"botocore.docs.bcdoc.",
"botocore.retries.",
"botocore.vendored.requests.",
"brotli.",
"brotlicffi.",
"bytecode.",
"cattrs.",
"cchardet.",
"certifi.",
"cffi.",
Expand Down Expand Up @@ -145,14 +138,23 @@
"colorama.",
"concurrent.futures.",
"configparser.",
"contourpy.",
"coreschema.",
"crispy_forms.",
"crypto.", # This module is patched by the IAST patch methods, propagation is not needed
"cx_logging.",
"cycler.",
"cython.",
"dateutil.",
"dateutil.",
"ddsketch.",
"ddtrace.",
"defusedxml.",
"deprecated.",
"difflib.",
"dill.info.",
"dill.settings.",
"silk.", # django-silk package
"dipy.",
"django.apps.config.",
"django.apps.registry.",
"django.conf.",
Expand Down Expand Up @@ -298,72 +300,87 @@
"django_filters.rest_framework.filterset.",
"django_filters.utils.",
"django_filters.widgets.",
"crypto.", # This module is patched by the IAST patch methods, propagation is not needed
"deprecated.",
"api_pb2.", # Patching crashes with these auto-generated modules, propagation is not needed
"api_pb2_grpc.", # Patching crashes with these auto-generated modules, propagation is not needed
"asyncpg.pgproto.",
"blinker.",
"bytecode.",
"cattrs.",
"ddsketch.",
"ddtrace.",
"dnspython.",
"elasticdeform.",
"envier.",
"exceptiongroup.",
"flask.",
"fonttools.",
"freezegun.", # Testing utilities for time manipulation
"google.auth.",
"googlecloudsdk.",
"gprof2dot.",
"h11.",
"h5py.",
"httpcore.",
"httptools.",
"httpx.",
"hypothesis.", # Testing utilities
"imageio.",
"importlib_metadata.",
"inspect.", # this package is used to get the stack frames, propagation is not needed
"itsdangerous.",
"kiwisolver.",
"matplotlib.",
"moto.", # used for mocking AWS, propagation is not needed
"mypy.",
"mypy_extensions.",
"networkx.",
"nibabel.",
"nilearn.",
"numba.",
"numpy.",
"opentelemetry-api.",
"packaging.",
"pandas.",
"pdf2image.",
"pefile.",
"pil.",
"pip.",
"pkg_resources.",
"pluggy.",
"protobuf.",
"psycopg.", # PostgreSQL adapter for Python (v3)
"_psycopg.", # PostgreSQL adapter for Python (v3)
"psycopg2.", # PostgreSQL adapter for Python (v2)
"pycodestyle.",
"pycparser.", # this package is called when a module is imported, propagation is not needed
"pydicom.",
"pyinstaller.",
"pynndescent.",
"pystray.",
"pytest.", # Testing framework
"_pytest.",
"pytz.",
"rich.",
"sanic.",
"scipy.",
"setuptools.",
"silk.", # django-silk package
"skbase.",
"sklearn.", # Machine learning library
"sniffio.",
"sqlalchemy.orm.interfaces.", # Performance optimization
"threadpoolctl.",
"tifffile.",
"tqdm.",
"trx.",
"typing_extensions.",
"umap.",
"unittest.mock.",
"uvloop.",
"urlpatterns_reverse.tests.", # assertRaises eat exceptions in native code, so we don't call the original function
"wrapt.",
"zipp.",
# This is a workaround for Sanic failures:
"uvicorn.",
"uvloop.",
"wcwidth.",
"websocket.",
"h11.",
"aioquic.",
"httptools.",
"sniffio.",
"sanic.",
"rich.",
"httpx.",
"websockets.",
"uvicorn.",
"anyio.",
"httpcore.",
"google.auth.",
"googlecloudsdk.",
"umap.",
"pynndescent.",
"numba.",
"werkzeug.",
"win32ctypes.",
"wrapt.",
"xlib.",
"zipp.",
)


if IAST.PATCH_MODULES in os.environ:
IAST_ALLOWLIST += tuple(os.environ[IAST.PATCH_MODULES].split(IAST.SEP_MODULES))

if IAST.DENY_MODULES in os.environ:
IAST_DENYLIST += tuple(os.environ[IAST.DENY_MODULES].split(IAST.SEP_MODULES))

USER_ALLOWLIST = tuple(os.environ.get(IAST.PATCH_MODULES, "").split(IAST.SEP_MODULES))
USER_DENYLIST = tuple(os.environ.get(IAST.DENY_MODULES, "").split(IAST.SEP_MODULES))

ENCODING = ""

Expand Down Expand Up @@ -399,6 +416,8 @@ def build_trie(words: Iterable[str]) -> _TrieNode:

_TRIE_ALLOWLIST = build_trie(IAST_ALLOWLIST)
_TRIE_DENYLIST = build_trie(IAST_DENYLIST)
_TRIE_USER_ALLOWLIST = build_trie(USER_ALLOWLIST)
_TRIE_USER_DENYLIST = build_trie(USER_DENYLIST)


def _trie_has_prefix_for(trie: _TrieNode, string: str) -> bool:
Expand Down Expand Up @@ -429,11 +448,26 @@ def get_encoding(module_path: Text) -> Text:

_NOT_PATCH_MODULE_NAMES = {i.lower() for i in _stdlib_for_python_version() | set(builtin_module_names)}

_IMPORTLIB_PACKAGES: Set[str] = set()


def _in_python_stdlib(module_name: str) -> bool:
return module_name.split(".")[0].lower() in _NOT_PATCH_MODULE_NAMES


def _is_first_party(module_name: str):
global _IMPORTLIB_PACKAGES
if "vendor." in module_name or "vendored." in module_name:
return False

if not _IMPORTLIB_PACKAGES:
from ddtrace.internal.packages import get_package_distributions

_IMPORTLIB_PACKAGES = set(get_package_distributions())

return module_name.split(".")[0] not in _IMPORTLIB_PACKAGES


def _should_iast_patch(module_name: Text) -> bool:
"""
select if module_name should be patch from the longest prefix that match in allow or deny list.
Expand All @@ -444,17 +478,30 @@ def _should_iast_patch(module_name: Text) -> bool:
# max_deny = max((len(prefix) for prefix in IAST_DENYLIST if module_name.startswith(prefix)), default=-1)
# diff = max_allow - max_deny
# return diff > 0 or (diff == 0 and not _in_python_stdlib_or_third_party(module_name))
if _in_python_stdlib(module_name):
log.debug("IAST: denying %s. it's in the _in_python_stdlib", module_name)
return False

if _is_first_party(module_name):
return True

# else: third party. Check that is in the allow list and not in the deny list
dotted_module_name = module_name.lower() + "."

# User allow or deny list set by env var have priority
if _trie_has_prefix_for(_TRIE_USER_ALLOWLIST, dotted_module_name):
return True

if _trie_has_prefix_for(_TRIE_USER_DENYLIST, dotted_module_name):
return False

if _trie_has_prefix_for(_TRIE_ALLOWLIST, dotted_module_name):
if _trie_has_prefix_for(_TRIE_DENYLIST, dotted_module_name):
return False
log.debug("IAST: allowing %s. it's in the IAST_ALLOWLIST", module_name)
return True
if _trie_has_prefix_for(_TRIE_DENYLIST, dotted_module_name):
log.debug("IAST: denying %s. it's in the IAST_DENYLIST", module_name)
return False
if _in_python_stdlib(module_name):
log.debug("IAST: denying %s. it's in the _in_python_stdlib", module_name)
return False
return True
log.debug("IAST: denying %s. it's in the IAST_DENYLIST", module_name)
return False


def visit_ast(
Expand Down
Loading

0 comments on commit 26dc258

Please sign in to comment.