Merge branch 'main' into main

akashverma0786 · Nov 26, 2024 · acb5aba · acb5aba
2 parents 4641a94 + f40138a
commit acb5aba
Show file tree

Hide file tree

Showing 264 changed files with 4,703 additions and 2,176 deletions.
diff --git a/.github/workflows/auto-cherry-pick-labeled-prs.yaml b/.github/workflows/auto-cherry-pick-labeled-prs.yaml
@@ -0,0 +1,76 @@
+---
+name: Cherry-pick labeled PRs to OpenMetadata release branch on merge
+# yamllint disable-line rule:comments
+run-name: OpenMetadata release cherry-pick PR #${{ github.event.pull_request.number }}
+
+# yamllint disable-line rule:truthy
+on:
+  pull_request:
+    types: [closed]
+    branches:
+      - main
+permissions:
+  contents: write
+  pull-requests: write
+env:
+  CURRENT_RELEASE_ENDPOINT: ${{ vars.CURRENT_RELEASE_ENDPOINT }}  # Endpoint that returns the current release version in json format
+jobs:
+  cherry_pick_to_release_branch:
+    if: github.event.pull_request.merged == true &&
+        contains(github.event.pull_request.labels.*.name, 'To release')
+    runs-on: ubuntu-latest  # Running it on ubuntu-latest on purpose (we're not using all the free minutes)
+    steps:
+      - name: Checkout main branch
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          fetch-depth: 0
+      - name: Get the release version
+        id: get_release_version
+        run: |
+          CURRENT_RELEASE=$(curl -s $CURRENT_RELEASE_ENDPOINT | jq -r .om_branch)
+          echo "CURRENT_RELEASE=${CURRENT_RELEASE}" >> $GITHUB_ENV
+      - name: Cherry-pick changes from PR
+        id: cherry_pick
+        continue-on-error: true
+        run: |
+          git config --global user.email "[email protected]"
+          git config --global user.name "OpenMetadata Release Bot"
+          git fetch origin ${CURRENT_RELEASE}
+          git checkout ${CURRENT_RELEASE}
+          git cherry-pick -x ${{ github.event.pull_request.merge_commit_sha }}
+      - name: Push changes to release branch
+        id: push_changes
+        continue-on-error: true
+        if: steps.cherry_pick.outcome == 'success'
+        run: |
+          git push origin ${CURRENT_RELEASE}
+      - name: Post a comment on failure
+        if: steps.cherry_pick.outcome != 'success' || steps.push_changes.outcome != 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.pull_request.number;
+            const releaseVersion = process.env.CURRENT_RELEASE;
+            const workflowRunUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`;
+            github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: `Failed to cherry-pick changes to the ${releaseVersion} branch.
+                Please cherry-pick the changes manually.
+                You can find more details [here](${workflowRunUrl}).`
+            })
+      - name: Post a comment on success
+        if: steps.cherry_pick.outcome == 'success' && steps.push_changes.outcome == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.pull_request.number;
+            const releaseVersion = process.env.CURRENT_RELEASE;
+            github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: `Changes have been cherry-picked to the ${releaseVersion} branch.`
+            })
diff --git a/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java b/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java
@@ -24,22 +24,20 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.text.DateFormat;
-import java.text.ParseException;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Base64;
-import java.util.Calendar;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Date;
 import java.util.Enumeration;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
-import java.util.TimeZone;
 import java.util.UUID;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -121,42 +119,42 @@ public static Collection<String> getResourcesFromDirectory(File file, Pattern pa
   }
 
   /** Get date after {@code days} from the given date or before i{@code days} when it is negative */
-  public static Date getDateByOffset(Date date, int days) {
-    Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
-    calendar.setTime(date);
-    calendar.add(Calendar.DATE, days);
-    return calendar.getTime();
+  public static LocalDate getDateByOffset(LocalDate localDate, int days) {
+    return localDate.plusDays(days);
   }
 
   /** Get date after {@code days} from the given date or before i{@code days} when it is negative */
-  public static Date getDateByOffset(DateFormat dateFormat, String strDate, int days) {
-    Date date;
+  public static LocalDate getDateByOffset(DateTimeFormatter dateFormat, String strDate, int days) {
+    LocalDate localDate;
     try {
-      date = dateFormat.parse(strDate);
-    } catch (ParseException e) {
+      localDate = LocalDate.parse(strDate, dateFormat);
+    } catch (DateTimeParseException e) {
       throw new IllegalArgumentException("Failed to parse date " + strDate, e);
     }
-    return getDateByOffset(date, days);
+    return getDateByOffset(localDate, days);
   }
 
   /** Get date after {@code days} from the given date or before i{@code days} when it is negative */
-  public static String getDateStringByOffset(DateFormat dateFormat, String strDate, int days) {
-    return dateFormat.format(getDateByOffset(dateFormat, strDate, days));
+  public static String getDateStringByOffset(
+      DateTimeFormatter dateFormat, String strDate, int days) {
+    LocalDate localDate = getDateByOffset(dateFormat, strDate, days);
+    return localDate.format(dateFormat);
   }
 
   /** Check if given date is with in today - pastDays and today + futureDays */
   public static boolean dateInRange(
-      DateFormat dateFormat, String date, int futureDays, int pastDays) {
-    Date today = new Date();
-    Date startDate = getDateByOffset(today, -pastDays);
-    Date endDate = getDateByOffset(today, futureDays);
-    Date givenDate;
+      DateTimeFormatter dateFormat, String date, int futureDays, int pastDays) {
+    LocalDate today = LocalDate.now();
+    LocalDate startDate = getDateByOffset(today, -pastDays);
+    LocalDate endDate = getDateByOffset(today, futureDays);
+    LocalDate givenDate;
     try {
-      givenDate = dateFormat.parse(date);
-    } catch (ParseException e) {
+      givenDate = LocalDate.parse(date, dateFormat);
+    } catch (DateTimeParseException e) {
       throw new IllegalArgumentException("Failed to parse date " + date, e);
     }
-    return givenDate.after(startDate) && givenDate.before(endDate);
+    return (givenDate.isAfter(startDate) || givenDate.equals(startDate))
+        && (givenDate.isBefore(endDate) || givenDate.equals(endDate));
   }
 
   public static final String HMAC_SHA256_ALGORITHM = "HmacSHA256";

diff --git a/ingestion/examples/sample_data/looker/dashboardDataModels.json b/ingestion/examples/sample_data/looker/dashboardDataModels.json
@@ -158,51 +158,6 @@
         "ordinalPosition": 5
       }
     ]
-  },
-  {
-    "name": "\"orders_view\" || \"operations_view\"",
-    "displayName": "Orders View and Operations View",
-    "description": "Orders View and Operations View from Sample Data",
-    "dataModelType":  "LookMlView",
-    "serviceType": "Looker",
-    "sql":  "view: orders {\n  sql_table_name: orders ;;\n\n  dimension: \"1. Phase I\" {\n    type: string\n    sql: ${TABLE}.status ;;\n  }\n\n  dimension: \"4. Authorized\" {\n    type: int\n    sql: ${TABLE}.amount ;;\n  }\n}",
-    "columns": [
-      {
-        "name": "0. Pre-clinical",
-        "dataType": "NUMERIC",
-        "dataTypeDisplay": "numeric",
-        "description": "Vaccine Candidates in phase: 'Pre-clinical'",
-        "ordinalPosition": 1
-      },
-      {
-        "name": "2. Phase II or Combined I/II",
-        "dataType": "NUMERIC",
-        "dataTypeDisplay": "numeric",
-        "description": "Vaccine Candidates in phase: 'Phase II or Combined I/II'",
-        "ordinalPosition": 2
-      },
-      {
-        "name": "1. Phase I",
-        "dataType": "NUMERIC",
-        "dataTypeDisplay": "numeric",
-        "description": "Vaccine Candidates in phase: 'Phase I'",
-        "ordinalPosition": 3
-      },
-      {
-        "name": "3. Phase III",
-        "dataType": "NUMERIC",
-        "dataTypeDisplay": "numeric",
-        "description": "Vaccine Candidates in phase: 'Phase III'",
-        "ordinalPosition": 4
-      },
-      {
-        "name": "4. Authorized",
-        "dataType": "NUMERIC",
-        "dataTypeDisplay": "numeric",
-        "description": "Vaccine Candidates in phase: 'Authorize'",
-        "ordinalPosition": 5
-      }
-    ]
   }
 ]
 
diff --git a/ingestion/pyproject.toml b/ingestion/pyproject.toml
@@ -280,3 +280,4 @@ ignore = [
 reportDeprecated = false
 reportMissingTypeStubs = false
 reportAny = false
+reportExplicitAny = false
diff --git a/ingestion/setup.py b/ingestion/setup.py
@@ -304,7 +304,6 @@
         "psycopg2-binary",
         VERSIONS["geoalchemy2"],
     },
-    "mstr": {"mstr-rest-requests==0.14.1"},
     "sagemaker": {VERSIONS["boto3"]},
     "salesforce": {"simple_salesforce~=1.11"},
     "sample-data": {VERSIONS["avro"], VERSIONS["grpc-tools"]},

diff --git a/ingestion/src/metadata/data_quality/builders/i_validator_builder.py b/ingestion/src/metadata/data_quality/builders/i_validator_builder.py
@@ -16,7 +16,7 @@
 
 from abc import ABC, abstractmethod
 from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Optional, Type, Union
+from typing import TYPE_CHECKING, Set, Type, Union
 
 from metadata.data_quality.validations.base_test_handler import BaseTestValidator
 from metadata.data_quality.validations.runtime_param_setter.param_setter import (
@@ -66,24 +66,20 @@ def __init__(
         )
         self.reset()
 
-    def set_runtime_params(
-        self, runtime_params_setter: Optional[RuntimeParameterSetter]
-    ):
+    def set_runtime_params(self, runtime_params_setters: Set[RuntimeParameterSetter]):
         """Set the runtime parameters for the validator object
 
-        # TODO: We should support setting n runtime parameters
-
         Args:
-            runtime_params_setter (Optional[RuntimeParameterSetter]): The runtime parameter setter
+            runtime_params_setters (Optional[RuntimeParameterSetter]): The runtime parameter setter
         """
-        if runtime_params_setter:
-            params = runtime_params_setter.get_parameters(self.test_case)
+        for setter in runtime_params_setters:
+            params = setter.get_parameters(self.test_case)
             if not self.test_case.parameterValues:
                 # If there are no parameters, create a new list
                 self.test_case.parameterValues = []
             self.test_case.parameterValues.append(
                 TestCaseParameterValue(
-                    name="runtimeParams", value=params.model_dump_json()
+                    name=type(params).__name__, value=params.model_dump_json()
                 )
             )
 

diff --git a/ingestion/src/metadata/data_quality/interface/test_suite_interface.py b/ingestion/src/metadata/data_quality/interface/test_suite_interface.py
@@ -15,7 +15,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Optional, Type
+from typing import Optional, Set, Type
 
 from metadata.data_quality.builders.i_validator_builder import IValidatorBuilder
 from metadata.data_quality.validations.base_test_handler import BaseTestValidator
@@ -111,9 +111,9 @@ def run_test_case(self, test_case: TestCase) -> Optional[TestCaseResult]:
         runtime_params_setter_fact: RuntimeParameterSetterFactory = (
             self._get_runtime_params_setter_fact()
         )  # type: ignore
-        runtime_params_setter: Optional[
+        runtime_params_setters: Set[
             RuntimeParameterSetter
-        ] = runtime_params_setter_fact.get_runtime_param_setter(
+        ] = runtime_params_setter_fact.get_runtime_param_setters(
             test_case.testDefinition.fullyQualifiedName,  # type: ignore
             self.ometa_client,
             self.service_connection_config,
@@ -127,7 +127,7 @@ def run_test_case(self, test_case: TestCase) -> Optional[TestCaseResult]:
         ).entityType.value
 
         validator_builder = self._get_validator_builder(test_case, entity_type)
-        validator_builder.set_runtime_params(runtime_params_setter)
+        validator_builder.set_runtime_params(runtime_params_setters)
         validator: BaseTestValidator = validator_builder.validator
         try:
             return validator.run_validation()

diff --git a/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py b/ingestion/src/metadata/data_quality/runner/base_test_suite_source.py
@@ -112,10 +112,14 @@ def create_data_quality_interface(self) -> TestSuiteInterface:
             entity=self.entity, metadata=self.ometa_client
         )
         test_suite_class = import_test_suite_class(
-            ServiceType.Database, source_type=self._interface_type
+            ServiceType.Database,
+            source_type=self._interface_type,
+            source_config_type=self.service_conn_config.type.value,
         )
         sampler_class = import_sampler_class(
-            ServiceType.Database, source_type=self._interface_type
+            ServiceType.Database,
+            source_type=self._interface_type,
+            source_config_type=self.service_conn_config.type.value,
         )
         # This is shared between the sampler and DQ interfaces
         _orm = self._build_table_orm(self.entity)

diff --git a/ingestion/src/metadata/data_quality/validations/base_test_handler.py b/ingestion/src/metadata/data_quality/validations/base_test_handler.py
@@ -19,10 +19,9 @@
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Callable, List, Optional, Type, TypeVar, Union
 
+from pydantic import BaseModel
+
 from metadata.data_quality.validations import utils
-from metadata.data_quality.validations.runtime_param_setter.param_setter import (
-    RuntimeParameterSetter,
-)
 from metadata.generated.schema.tests.basic import (
     TestCaseResult,
     TestCaseStatus,
@@ -37,6 +36,7 @@
 
 T = TypeVar("T", bound=Callable)
 R = TypeVar("R")
+S = TypeVar("S", bound=BaseModel)
 
 
 class BaseTestValidator(ABC):
@@ -45,8 +45,6 @@ class BaseTestValidator(ABC):
     This can be useful to resolve complex test parameters based on the parameters gibven by the user.
     """
 
-    runtime_parameter_setter: Optional[Type[RuntimeParameterSetter]] = None
-
     def __init__(
         self,
         runner: Union[QueryRunner, List["DataFrame"]],
@@ -168,3 +166,10 @@ def get_max_bound(self, param_name: str) -> Optional[float]:
     def get_predicted_value(self) -> Optional[str]:
         """Get predicted value"""
         return None
+
+    def get_runtime_parameters(self, setter_class: Type[S]) -> S:
+        """Get runtime parameters"""
+        for param in self.test_case.parameterValues or []:
+            if param.name == setter_class.__name__:
+                return setter_class.model_validate_json(param.value)
+        raise ValueError(f"Runtime parameter {setter_class.__name__} not found")