From 457fd68006d22eee338daabeafee8f74b4c3aac8 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Thu, 7 Mar 2024 15:37:28 +0100 Subject: [PATCH] Scrub secret vars - Scrub secret vars in RequiredVarNotFoundError - Scrub secret vars in StateCheckVarsHash event - Scrub secret vars in run results --- .../unreleased/Features-20240307-153622.yaml | 6 ++ core/dbt/artifacts/schemas/run/v5/run.py | 24 +++++- core/dbt/exceptions.py | 7 +- core/dbt/parser/manifest.py | 7 +- .../context_methods/test_cli_vars.py | 81 ++++++++++++++++++- tests/unit/test_parse_manifest.py | 2 +- 6 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 .changes/unreleased/Features-20240307-153622.yaml diff --git a/.changes/unreleased/Features-20240307-153622.yaml b/.changes/unreleased/Features-20240307-153622.yaml new file mode 100644 index 00000000000..80886a82c9b --- /dev/null +++ b/.changes/unreleased/Features-20240307-153622.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support scrubbing secret vars +time: 2024-03-07T15:36:22.754627+01:00 +custom: + Author: nielspardon + Issue: "7247" diff --git a/core/dbt/artifacts/schemas/run/v5/run.py b/core/dbt/artifacts/schemas/run/v5/run.py index 82ef5232c9c..47cc0cb3b87 100644 --- a/core/dbt/artifacts/schemas/run/v5/run.py +++ b/core/dbt/artifacts/schemas/run/v5/run.py @@ -1,9 +1,11 @@ import threading from typing import Any, Optional, Iterable, Tuple, Sequence, Dict, TYPE_CHECKING +import copy from dataclasses import dataclass, field from datetime import datetime +from dbt.constants import SECRET_ENV_PREFIX from dbt.artifacts.resources import CompiledResource from dbt.artifacts.schemas.base import ( BaseArtifactMetadata, @@ -19,6 +21,7 @@ ExecutionResult, ) from dbt_common.clients.system import write_json +from dbt.exceptions import scrub_secrets if TYPE_CHECKING: @@ -123,7 +126,26 @@ def from_execution_results( dbt_schema_version=str(cls.dbt_schema_version), generated_at=generated_at, ) - return cls(metadata=meta, results=processed_results, elapsed_time=elapsed_time, args=args) + + secret_vars = [ + v for k, v in args["vars"].items() if k.startswith(SECRET_ENV_PREFIX) and v.strip() + ] + + scrubbed_args = copy.deepcopy(args) + + # scrub secrets in invocation command + scrubbed_args["invocation_command"] = scrub_secrets( + scrubbed_args["invocation_command"], secret_vars + ) + + # scrub secrets in vars dict + scrubbed_args["vars"] = { + k: scrub_secrets(v, secret_vars) for k, v in scrubbed_args["vars"].items() + } + + return cls( + metadata=meta, results=processed_results, elapsed_time=elapsed_time, args=scrubbed_args + ) @classmethod def compatible_previous_versions(cls) -> Iterable[Tuple[str, int]]: diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 3f3406b43f2..721e65b4c27 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -17,6 +17,8 @@ from dbt_common.dataclass_schema import ValidationError +from dbt.constants import SECRET_ENV_PREFIX + if TYPE_CHECKING: import agate @@ -333,7 +335,10 @@ def get_message(self) -> str: pretty_vars = json.dumps(dct, sort_keys=True, indent=4) msg = f"Required var '{self.var_name}' not found in config:\nVars supplied to {node_name} = {pretty_vars}" - return msg + return scrub_secrets(msg, self.var_secrets()) + + def var_secrets(self) -> List[str]: + return [v for k, v in self.merged.items() if k.startswith(SECRET_ENV_PREFIX) and v.strip()] class PackageNotFoundForMacroError(CompilationError): diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 03809635620..a0cc49faa20 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -44,6 +44,7 @@ MANIFEST_FILE_NAME, PARTIAL_PARSE_FILE_NAME, SEMANTIC_MANIFEST_FILE_NAME, + SECRET_ENV_PREFIX, ) from dbt_common.helper_types import PathSet from dbt_common.events.functions import fire_event, get_invocation_id, warn_or_error @@ -116,6 +117,7 @@ TargetNotFoundError, AmbiguousAliasError, InvalidAccessTypeError, + scrub_secrets, ) from dbt.parser.base import Parser from dbt.parser.analysis import AnalysisParser @@ -989,6 +991,9 @@ def build_manifest_state_check(self): # of env_vars, that would need to change. # We are using the parsed cli_vars instead of config.args.vars, in order # to sort them and avoid reparsing because of ordering issues. + secret_vars = [ + v for k, v in config.cli_vars.items() if k.startswith(SECRET_ENV_PREFIX) and v.strip() + ] stringified_cli_vars = pprint.pformat(config.cli_vars) vars_hash = FileHash.from_contents( "\x00".join( @@ -1003,7 +1008,7 @@ def build_manifest_state_check(self): fire_event( StateCheckVarsHash( checksum=vars_hash.checksum, - vars=stringified_cli_vars, + vars=scrub_secrets(stringified_cli_vars, secret_vars), profile=config.args.profile, target=config.args.target, version=__version__, diff --git a/tests/functional/context_methods/test_cli_vars.py b/tests/functional/context_methods/test_cli_vars.py index d3d5dfc8197..1d72e8c5021 100644 --- a/tests/functional/context_methods/test_cli_vars.py +++ b/tests/functional/context_methods/test_cli_vars.py @@ -3,7 +3,13 @@ from tests.fixtures.dbt_integration_project import dbt_integration_project # noqa: F401 -from dbt.tests.util import run_dbt, get_artifact, write_config_file +from dbt.tests.util import ( + run_dbt, + run_dbt_and_capture, + get_logging_events, + get_artifact, + write_config_file, +) from dbt.tests.fixtures.project import write_project_files from dbt.exceptions import DbtRuntimeError, CompilationError @@ -206,3 +212,76 @@ def test_vars_in_selectors(self, project): # Var in cli_vars works results = run_dbt(["run", "--vars", "snapshot_target: dev"]) assert len(results) == 1 + + +models_scrubbing__schema_yml = """ +version: 2 +models: +- name: simple_model + columns: + - name: simple + data_tests: + - accepted_values: + values: + - abc +""" + +models_scrubbing__simple_model_sql = """ +select + '{{ var("DBT_ENV_SECRET_simple") }}'::varchar as simple +""" + + +class TestCLIVarsScrubbing: + @pytest.fixture(scope="class") + def models(self): + return { + "schema.yml": models_scrubbing__schema_yml, + "simple_model.sql": models_scrubbing__simple_model_sql, + } + + def test__run_results_scrubbing(self, project): + results, output = run_dbt_and_capture( + [ + "--debug", + "--log-format", + "json", + "run", + "--vars", + "{DBT_ENV_SECRET_simple: abc, unused: def}", + ] + ) + assert len(results) == 1 + + run_results = get_artifact(project.project_root, "target", "run_results.json") + assert run_results["args"]["vars"] == { + "DBT_ENV_SECRET_simple": "*****", + "unused": "def", + } + + log_events = get_logging_events(log_output=output, event_name="StateCheckVarsHash") + assert len(log_events) == 1 + assert ( + log_events[0]["data"]["vars"] == "{'DBT_ENV_SECRET_simple': '*****', 'unused': 'def'}" + ) + + def test__exception_scrubbing(self, project): + results, output = run_dbt_and_capture( + [ + "--debug", + "--log-format", + "json", + "run", + "--vars", + "{DBT_ENV_SECRET_unused: abc, unused: def}", + ], + False, + ) + assert len(results) == 1 + + log_events = get_logging_events(log_output=output, event_name="CatchableExceptionOnRun") + assert len(log_events) == 1 + assert ( + '{\n "DBT_ENV_SECRET_unused": "*****",\n "unused": "def"\n }' + in log_events[0]["info"]["msg"] + ) diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index 23d6d51446d..7471b399acd 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -108,7 +108,7 @@ def _new_file(self, searched, name, match): class TestPartialParse(unittest.TestCase): def setUp(self) -> None: mock_project = MagicMock(RuntimeConfig) - mock_project.cli_vars = "" + mock_project.cli_vars = {} mock_project.args = MagicMock() mock_project.args.profile = "test" mock_project.args.target = "test"